Sorsogon. Step 2.a Dynamic Sampling Model and GREGWT¶
In [1]:
import datetime; print(datetime.datetime.now())
2018-03-26 01:28:43.554147
Notebook abstract
This notebook shows the main sampling and reweighting algorithm.
Import libraries¶
In [2]:
from smum.microsim.run import run_calibrated_model
from smum.microsim.table import TableModel
/usr/lib/python3.6/site-packages/h5py-2.7.1-py3.6-linux-x86_64.egg/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
from ._conv import register_converters as _register_converters
Global variables¶
In [3]:
iterations = 1000
benchmark_year = 2016
census_file = 'data/benchmarks_year_bias.csv'
typ = 'resampled'
model_name = 'Sorsogon_Electricity_Water_wbias_projected_dynamic_{}'.format(typ)
verbose = False
#The number of chains to run in parallel.
njobs = 4
Define Table model¶
In [4]:
tm = TableModel(census_file = census_file, verbose=verbose)
Income model¶
In [5]:
tm.add_model('data/table_inc.csv', 'Income')
tm.update_dynamic_model('Income', specific_col = 'Education')
tm.update_dynamic_model('Income',
specific_col = 'FamilySize',
specific_col_as = 'Size',
val = 'mu', compute_average = 0)
tm.update_dynamic_model('Income',
specific_col = 'Age',
val = 'mu', compute_average = 0)
In [6]:
tm.models['Income'].loc[2020]
Out[6]:
co_mu | co_sd | p | mu | sd | dis | ub | lb | |
---|---|---|---|---|---|---|---|---|
i_Intercept | NaN | NaN | 1147.66 | NaN | NaN | Deterministic | NaN | NaN |
i_Sex | 919.012059036333 | 161.50344091572538 | 0.243795 | NaN | NaN | Bernoulli | NaN | NaN |
i_Urbanity | 7105.2244566329355 | 127.94148635675795 | 0.6356 | NaN | NaN | Bernoulli | NaN | NaN |
i_FamilySize | 1666.846395220964 | 29.03482607534048 | NaN | 3.70878 | 1.83794 | Poisson | 10 | 1 |
i_Age | 116.57589770606201 | 4.681393204635 | NaN | 52.5153 | 12.2451 | Normal | 100 | 18 |
i_Education | 1.0,6023.86254599,11959.091528,18727.4606703,1... | 1e-10,140.904404522,217.208790314,282.17614554... | 0.2430379746835443,0.21581625995041107,0.25540... | NaN | NaN | Categorical | NaN | NaN |
In [7]:
formula_inc = "i_Intercept+"+"+".join(
["c_{0} * {0}".format(e) for e in tm.models['Income'][benchmark_year].index if\
(e != 'i_Intercept')])
tm.add_formula(formula_inc, 'Income')
In [8]:
tm.print_formula('Income')
Income =
i_Intercept +
c_i_Sex * i_Sex +
c_i_Urbanity * i_Urbanity +
c_i_FamilySize * i_FamilySize +
c_i_Age * i_Age +
c_i_Education * i_Education +
Electricity model¶
In [9]:
tm.add_model('data/table_elec.csv', 'Electricity', reference_cat = ['yes'])
tm.update_dynamic_model('Electricity', specific_col = 'Income', val = 'mu', compute_average = False)
In [10]:
tm.models['Electricity'].loc[2016]
Out[10]:
co_mu | co_sd | p | mu | sd | dis | ub | lb | |
---|---|---|---|---|---|---|---|---|
e_Intercept | NaN | NaN | 3.29998 | NaN | NaN | Deterministic | NaN | NaN |
e_Lighting | 0.825662 | 18.6676 | 0.946022 | NaN | NaN | Bernoulli | NaN | NaN |
e_TV | 18.7899 | 1.75962 | 0.964932 | NaN | NaN | Bernoulli | NaN | NaN |
e_Cooking | 28.8862 | 1.96894 | 0.0142662 | NaN | NaN | Bernoulli | NaN | NaN |
e_Refrigeration | 59.2432 | 1.55605 | 0.602102 | NaN | NaN | Bernoulli | NaN | NaN |
e_AC | 203.323 | 3.13016 | 0.256521 | NaN | NaN | Bernoulli | NaN | NaN |
e_Urban | 24.5935 | 1.39104 | 1 | NaN | NaN | Bernoulli | NaN | NaN |
e_Income | 0.00142607 | 4.10201e-05 | NaN | 190472 | 1904.72 | None | inf | 0 |
In [11]:
formula_elec = "e_Intercept+"+"+".join(
["c_{0} * {0}".format(e) for e in tm.models['Electricity'][benchmark_year].index if\
(e != 'e_Intercept') &\
(e != 'e_Income') &\
(e != 'e_Urban')
])
formula_elec += '+c_e_Urban * i_Urbanity'
formula_elec += '+c_e_{0} * {0}'.format('Income')
In [12]:
tm.add_formula(formula_elec, 'Electricity')
In [13]:
tm.print_formula('Electricity')
Electricity =
e_Intercept +
c_e_Lighting * e_Lighting +
c_e_TV * e_TV +
c_e_Cooking * e_Cooking +
c_e_Refrigeration * e_Refrigeration +
c_e_AC * e_AC +
c_e_Urban * i_Urbanity +
c_e_Income * Income +
Water model¶
In [14]:
tm.add_model('data/table_water.csv', 'Water')
tm.update_dynamic_model('Water', specific_col = 'Education')
tm.update_dynamic_model('Water',
specific_col = 'FamilySize',
specific_col_as = 'Size',
val = 'mu', compute_average = 0)
tm.update_dynamic_model('Water',
specific_col = 'Age',
val = 'mu', compute_average = 0)
In [15]:
tm.models['Water'].loc[2020]
Out[15]:
co_mu | co_sd | p | dis | mu | sd | ub | lb | |
---|---|---|---|---|---|---|---|---|
w_Intercept | NaN | NaN | -601.592 | Deterministic | NaN | NaN | NaN | NaN |
w_Sex | 98.49504620801835 | 29.44380722589748 | 0.243795 | None | NaN | NaN | NaN | NaN |
w_Urbanity | 1000.9789077676428 | 25.415910606032206 | 0.6356 | None | NaN | NaN | NaN | NaN |
w_Total_Family_Income | 0.05318701200857999 | 0.0009823058551951082 | NaN | None | NaN | NaN | NaN | NaN |
w_FamilySize | 49.73935151831777 | 5.897790558149098 | NaN | None | 3.70878 | 1.83794 | NaN | NaN |
w_Age | 6.088941881654669 | 0.9127405886772298 | NaN | None | 52.5153 | 12.2451 | NaN | NaN |
w_Education | 1.0,214.4011453125436,260.32727427717964,101.7... | 1e-10,28.815802440470176,40.0574490885231,49.9... | 0.2430379746835443,0.21581625995041107,0.25540... | None;i;Categorical | NaN | NaN | NaN | NaN |
In [16]:
formula_water = "w_Intercept+"+"+".join(
["c_{0} * {1}".format(e, "i_"+"_".join(e.split('_')[1:]))\
for e in tm.models['Water'][benchmark_year].index if \
(e != 'w_Intercept') &\
(e != 'w_Total_Family_Income') &\
(e != 'w_Education')
])
formula_water += '+c_w_Total_Family_Income*Income'
formula_water += '+c_w_Education*i_Education'
In [17]:
tm.add_formula(formula_water, 'Water')
In [18]:
tm.print_formula('Water')
Water =
w_Intercept +
c_w_Sex * i_Sex +
c_w_Urbanity * i_Urbanity +
c_w_FamilySize * i_FamilySize +
c_w_Age * i_Age +
c_w_Total_Family_Income*Income +
c_w_Education*i_Education +
Make model and save it to excel¶
In [19]:
table_model = tm.make_model()
In [20]:
tm.to_excel()
creating data/tableModel_Income.xlsx
creating data/tableModel_Electricity.xlsx
creating data/tableModel_Water.xlsx
Define model variables¶
In [21]:
labels = ['age_0_18', 'age_19_25', 'age_26_35',
'age_36_45', 'age_46_55', 'age_56_65',
'age_66_75', 'age_76_85', 'age_86_100']
cut = [0, 19, 26, 36, 46, 56, 66, 76, 86, 101]
to_cat = {'i_Age':[cut, labels]}
drop_col_survey = ['e_Income', 'e_Urban', 'w_Total_Family_Income', 'w_Education']
In [ ]:
fw = run_calibrated_model(
table_model,
project = typ,
njobs = njobs,
#rep = {'FamilySize': ['Size']},
#rep={'urb': ['urban', 'urbanity']},
census_file = census_file,
year = benchmark_year,
population_size = False,
name = '{}_{}'.format(model_name, iterations),
to_cat = to_cat,
iterations = iterations,
verbose = verbose,
drop_col_survey = drop_col_survey)