Brussels. Step 2.a Dynamic Sampling Model and GREGWT

In [1]:
import datetime; print(datetime.datetime.now())
2018-04-03 15:01:31.465870

Notebook abstract

This notebook shows the main sampling and reweighting algorithm.

Import libraries

In [2]:
from smum.microsim.run import run_calibrated_model
from smum.microsim.table import TableModel
/usr/lib/python3.6/site-packages/h5py-2.7.1-py3.6-linux-x86_64.egg/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters

Global variables

In [3]:
iterations = 1000
year = 2016
census_file = 'data/benchmarks_be_year_bias3_climate.csv'
typ = 'resampled'
model_name = 'Brussels_Electricity_Water_projected_dynamic_{}_bias'.format(typ)
verbose = False
#The number of chains to run in parallel.
njobs = 4

Define Table model

In [4]:
tm = TableModel(census_file = census_file, verbose=verbose)

Water model

In [5]:
tm.add_model('data/table_water.csv', 'Water')

tm.update_dynamic_model(
    'Water', specific_col = 'ConstructionType', select = 1)
tm.update_dynamic_model(
    'Water', specific_col = 'Age', val = 'mu', compute_average = 0)
tm.update_dynamic_model(
    'Water', specific_col = 'ConstructionYear', val = 'mu')
tm.update_dynamic_model(
    'Water', specific_col = 'HHSize', val = 'mu')
tm.update_dynamic_model(
    'Water', specific_col = 'Income', val = 'mu',
    compute_average = False)
In [6]:
tm.models['Water'].loc[2020]
Out[6]:
co_mu co_sd p mu sd dis ub lb
w_Intercept NaN NaN -5.28241 NaN NaN Deterministic inf -inf
w_ConstructionType 1.15175 0.111739 0.825655 NaN NaN Bernoulli inf 0
w_Age 0.000515922 0.00169118 NaN 56.0519 23.2562 Normal 85 20
w_ConstructionYear 0.0156761 0.00115888 NaN 1957.4 40.6553 Poisson 2035 1800
w_HHSize 10.3606 0.272889 NaN 3.11831 1.71888 Poisson 8 1
w_Income 0.000956665 1.27204e-05 NaN 13648.4 136.484 Gamma inf 0
In [7]:
formula_water = "+".join(
    ["c_{0}*{0}".format(e) for e in tm.models['Water'][year].index if\
     (e not in  ['w_Intercept'])
    ])
tm.add_formula(formula_water, 'Water')
In [8]:
tm.add_formula(formula_water, 'Water')
In [9]:
tm.print_formula('Water')
Water =
         c_w_ConstructionType*w_ConstructionType +
         c_w_Age*w_Age +
         c_w_ConstructionYear*w_ConstructionYear +
         c_w_HHSize*w_HHSize +
         c_w_Income*w_Income +

Electricity model

In [10]:
tm.add_model('data/table_elec.csv',  'Electricity',
            skip_cols = [
                'ConstructionType',
                'Income',
                'HHSize',
                'ConstructionYear',
                'ELWARM',
                'ELWATER',
                'ELFOOD'])
tm.update_dynamic_model(
   'Electricity', specific_col = 'sqm', val = 'mu',
    compute_average = False)
tm.update_dynamic_model(
    'Electricity', specific_col = 'CDD',
    static = True,
    compute_average = False)
tm.update_dynamic_model(
    'Electricity', specific_col = 'HDD',
    static = True,
    compute_average = False)
In [11]:
tm.models['Electricity'].loc[2016]
Out[11]:
co_mu co_sd p mu sd dis ub lb
e_Intercept NaN NaN -29960.6 NaN NaN Deterministic inf -inf
e_ConstructionType 2752.5 138.89 NaN NaN NaN None inf 0
e_sqm 16.7486 0.524321 NaN 73.0045 0.730045 Normal inf 0
e_CDD 1.71525 0.0888685 833.3 NaN NaN Deterministic inf 0
e_HDD 0.198506 0.0406247 3006.5 NaN NaN Deterministic inf 0
e_Income 0.0322898 0.00266627 NaN NaN NaN None inf 0
e_HHSize 1037.73 36.3344 NaN NaN NaN None 8 1
e_ConstructionYear 12.9729 2.21459 NaN NaN NaN None 2035 1800
In [12]:
skip_elec = [
    'e_Intercept', 'e_ConstructionType', 'e_Income', 'e_HHSize', 'e_ConstructionYear',
    'e_CDD', 'e_HDD',
]
formula_elec = "+".join(
    ["c_{0}*{0}".format(e) for e in tm.models['Electricity'][year].index \
     if (e not in skip_elec)
    ])
formula_elec += '+c_e_ConstructionType*w_ConstructionType +\
c_e_Income*w_Income +\
c_e_HHSize*w_HHSize +\
c_e_ConstructionYear*w_ConstructionYear+\
e_CDD +\
e_HDD'
In [13]:
tm.add_formula(formula_elec, 'Electricity')
In [14]:
tm.print_formula('Electricity')
Electricity =
         c_e_sqm*e_sqm +
         c_e_ConstructionType*w_ConstructionType  +
         c_e_Income*w_Income  +
         c_e_HHSize*w_HHSize  +
         c_e_ConstructionYear*w_ConstructionYear +
         e_CDD  +
         e_HDD +

Make model and save it to excel

In [15]:
table_model = tm.make_model()
In [16]:
tm.to_excel(sufix = "_be")
creating data/tableModel_Water_be.xlsx
creating data/tableModel_Electricity_be.xlsx

Define model variables

In [17]:
labels_age = [
    'Age_24', 'Age_29', 'Age_39',#3
    'Age_54', 'Age_64', 'Age_79',#6
    'Age_120']
cut_age = [17,
       24, 29, 39,
       54, 64, 79,
       120]

labels_cy = [
    'ConstructionYear_1900', 'ConstructionYear_1918',
    'ConstructionYear_1945', 'ConstructionYear_1961',
    'ConstructionYear_1970', 'ConstructionYear_1981',
    'ConstructionYear_1991', 'ConstructionYear_2001',
    'ConstructionYear_2011', 'ConstructionYear_2016',
    'ConstructionYear_2020', 'ConstructionYear_2030',
    'ConstructionYear_2035']
cut_cy = [0,
          1900, 1918,
          1945, 1961,
          1970, 1981,
          1991, 2001,
          2011, 2016,
          2020, 2030,
          2100]

to_cat = {
    'w_Age':[cut_age, labels_age],
    'w_ConstructionYear':[cut_cy, labels_cy],
         }

drop_col_survey = [
    'e_ConstructionType', 'e_Income', 'e_HHSize', 'e_ConstructionYear',
    'e_HDD', 'e_CDD'
]
In [ ]:
fw = run_calibrated_model(
    table_model,
    verbose = verbose,
    project = typ,
    njobs = njobs,
    census_file = census_file,
    year = year,
    name = '{}_{}'.format(model_name, iterations),
    to_cat = to_cat,
    iterations = iterations,
    drop_col_survey = drop_col_survey)