Brussels. Step 2.a Dynamic Sampling Model and GREGWT¶

In [1]:

import datetime; print(datetime.datetime.now())

2018-04-03 15:01:31.465870

Notebook abstract

This notebook shows the main sampling and reweighting algorithm.

Import libraries¶

In [2]:

from smum.microsim.run import run_calibrated_model
from smum.microsim.table import TableModel

/usr/lib/python3.6/site-packages/h5py-2.7.1-py3.6-linux-x86_64.egg/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters

Global variables¶

In [3]:

iterations = 1000
year = 2016
census_file = 'data/benchmarks_be_year_bias3_climate.csv'
typ = 'resampled'
model_name = 'Brussels_Electricity_Water_projected_dynamic_{}_bias'.format(typ)
verbose = False
#The number of chains to run in parallel.
njobs = 4

Define Table model¶

In [4]:

tm = TableModel(census_file = census_file, verbose=verbose)

Water model¶

In [5]:

tm.add_model('data/table_water.csv', 'Water')

tm.update_dynamic_model(
    'Water', specific_col = 'ConstructionType', select = 1)
tm.update_dynamic_model(
    'Water', specific_col = 'Age', val = 'mu', compute_average = 0)
tm.update_dynamic_model(
    'Water', specific_col = 'ConstructionYear', val = 'mu')
tm.update_dynamic_model(
    'Water', specific_col = 'HHSize', val = 'mu')
tm.update_dynamic_model(
    'Water', specific_col = 'Income', val = 'mu',
    compute_average = False)

In [6]:

tm.models['Water'].loc[2020]

Out[6]:

	co_mu	co_sd	p	mu	sd	dis	ub	lb
w_Intercept	NaN	NaN	-5.28241	NaN	NaN	Deterministic	inf	-inf
w_ConstructionType	1.15175	0.111739	0.825655	NaN	NaN	Bernoulli	inf	0
w_Age	0.000515922	0.00169118	NaN	56.0519	23.2562	Normal	85	20
w_ConstructionYear	0.0156761	0.00115888	NaN	1957.4	40.6553	Poisson	2035	1800
w_HHSize	10.3606	0.272889	NaN	3.11831	1.71888	Poisson	8	1
w_Income	0.000956665	1.27204e-05	NaN	13648.4	136.484	Gamma	inf	0

In [7]:

formula_water = "+".join(
    ["c_{0}*{0}".format(e) for e in tm.models['Water'][year].index if\
     (e not in  ['w_Intercept'])
    ])
tm.add_formula(formula_water, 'Water')

In [8]:

tm.add_formula(formula_water, 'Water')

In [9]:

tm.print_formula('Water')

Water =
         c_w_ConstructionType*w_ConstructionType +
         c_w_Age*w_Age +
         c_w_ConstructionYear*w_ConstructionYear +
         c_w_HHSize*w_HHSize +
         c_w_Income*w_Income +

Electricity model¶

In [10]:

tm.add_model('data/table_elec.csv',  'Electricity',
            skip_cols = [
                'ConstructionType',
                'Income',
                'HHSize',
                'ConstructionYear',
                'ELWARM',
                'ELWATER',
                'ELFOOD'])
tm.update_dynamic_model(
   'Electricity', specific_col = 'sqm', val = 'mu',
    compute_average = False)
tm.update_dynamic_model(
    'Electricity', specific_col = 'CDD',
    static = True,
    compute_average = False)
tm.update_dynamic_model(
    'Electricity', specific_col = 'HDD',
    static = True,
    compute_average = False)

In [11]:

tm.models['Electricity'].loc[2016]

Out[11]:

	co_mu	co_sd	p	mu	sd	dis	ub	lb
e_Intercept	NaN	NaN	-29960.6	NaN	NaN	Deterministic	inf	-inf
e_ConstructionType	2752.5	138.89	NaN	NaN	NaN	None	inf	0
e_sqm	16.7486	0.524321	NaN	73.0045	0.730045	Normal	inf	0
e_CDD	1.71525	0.0888685	833.3	NaN	NaN	Deterministic	inf	0
e_HDD	0.198506	0.0406247	3006.5	NaN	NaN	Deterministic	inf	0
e_Income	0.0322898	0.00266627	NaN	NaN	NaN	None	inf	0
e_HHSize	1037.73	36.3344	NaN	NaN	NaN	None	8	1
e_ConstructionYear	12.9729	2.21459	NaN	NaN	NaN	None	2035	1800

In [12]:

skip_elec = [
    'e_Intercept', 'e_ConstructionType', 'e_Income', 'e_HHSize', 'e_ConstructionYear',
    'e_CDD', 'e_HDD',
]
formula_elec = "+".join(
    ["c_{0}*{0}".format(e) for e in tm.models['Electricity'][year].index \
     if (e not in skip_elec)
    ])
formula_elec += '+c_e_ConstructionType*w_ConstructionType +\
c_e_Income*w_Income +\
c_e_HHSize*w_HHSize +\
c_e_ConstructionYear*w_ConstructionYear+\
e_CDD +\
e_HDD'

In [13]:

tm.add_formula(formula_elec, 'Electricity')

In [14]:

tm.print_formula('Electricity')

Electricity =
         c_e_sqm*e_sqm +
         c_e_ConstructionType*w_ConstructionType  +
         c_e_Income*w_Income  +
         c_e_HHSize*w_HHSize  +
         c_e_ConstructionYear*w_ConstructionYear +
         e_CDD  +
         e_HDD +

Make model and save it to excel¶

In [15]:

table_model = tm.make_model()

In [16]:

tm.to_excel(sufix = "_be")

creating data/tableModel_Water_be.xlsx
creating data/tableModel_Electricity_be.xlsx

Define model variables¶

In [17]:

labels_age = [
    'Age_24', 'Age_29', 'Age_39',#3
    'Age_54', 'Age_64', 'Age_79',#6
    'Age_120']
cut_age = [17,
       24, 29, 39,
       54, 64, 79,
       120]

labels_cy = [
    'ConstructionYear_1900', 'ConstructionYear_1918',
    'ConstructionYear_1945', 'ConstructionYear_1961',
    'ConstructionYear_1970', 'ConstructionYear_1981',
    'ConstructionYear_1991', 'ConstructionYear_2001',
    'ConstructionYear_2011', 'ConstructionYear_2016',
    'ConstructionYear_2020', 'ConstructionYear_2030',
    'ConstructionYear_2035']
cut_cy = [0,
          1900, 1918,
          1945, 1961,
          1970, 1981,
          1991, 2001,
          2011, 2016,
          2020, 2030,
          2100]

to_cat = {
    'w_Age':[cut_age, labels_age],
    'w_ConstructionYear':[cut_cy, labels_cy],
         }

drop_col_survey = [
    'e_ConstructionType', 'e_Income', 'e_HHSize', 'e_ConstructionYear',
    'e_HDD', 'e_CDD'
]

In [ ]:

fw = run_calibrated_model(
    table_model,
    verbose = verbose,
    project = typ,
    njobs = njobs,
    census_file = census_file,
    year = year,
    name = '{}_{}'.format(model_name, iterations),
    to_cat = to_cat,
    iterations = iterations,
    drop_col_survey = drop_col_survey)