#!/usr/bin/env python # coding: utf-8 # Age groups: # 1. 0-19 # 2. 20-44 # 3. 45-64 # 4. 65+ # In[7]: import pandas as pd import numpy as np import os data_path = os.sep.join(["data","messina", "before_simulation"]) sample_data = os.sep.join([data_path]) household = "IT_2013h_EUSILC.csv" personal = "IT_2013p_EUSILC.csv" household_register = "IT_2013d_EUSILC.csv" personal_register = "IT_2013r_EUSILC.csv" #variables we need from files d_var = ["DB030","DB040","DB090"]#hhid, federal state, hh weight p_var = ["PB030","PL031"] #economic status r_var = ["RB030","RB090","RB050","RB080"] #person's id, person's gender, weights, year of birth #read data and merge h = pd.read_csv(os.sep.join([sample_data,household])) p = pd.read_csv(os.sep.join([sample_data,personal]))[p_var] d = pd.read_csv(os.sep.join([sample_data,household_register]))[d_var] r = pd.read_csv(os.sep.join([sample_data,personal_register]))[r_var] r["hhid"] = (np.floor(r["RB030"]/10)).astype(int) r["RB080"] = 2021-r["RB080"] r = r.merge(d, left_on = "hhid", right_on = "DB030") # r.to_csv(data_path+"microdata_ES.csv") # # add grpup og age instead age age = r["RB080"] for i in range(0,19): age = age.replace(i,1) for i in range(20,24): age = age.replace(i,2) for i in range(25,44): age = age.replace(i,3) for i in range(45,64): age = age.replace(i,4) for i in range(65,120): age = age.replace(i,5) r["RB080"] = age ind = r[["RB090","RB080"]] ind = ind.rename(columns={"RB090":"gender","RB080":"age"}) ind["gender"] = ind["gender"].replace({2:"f",1:"m"}) ind.to_csv(os.sep.join([data_path,"ind.csv"]),sep=",") # Filter marginal data # In[110]: # Read json marginal data import json marginal_path = os.sep.join([data_path,'marginal_data.json']) f = open(marginal_path) marginal_data = json.load(f) header = list() for i in marginal_data["districts-summary"]["district-I"]["ages"]: header.append(i) header.append("m") header.append("f") data = pd.DataFrame(columns=header) for i in marginal_data["districts-summary"]: row=list() males = 0 females = 0 for j in marginal_data["districts-summary"][i]["ages"]: row.append(marginal_data["districts-summary"][i]["ages"][j]["M"]+marginal_data["districts-summary"][i]["ages"][j]["F"]) males+=marginal_data["districts-summary"][i]["ages"][j]["F"] females+=marginal_data["districts-summary"][i]["ages"][j]["M"] row.append(males) row.append(females) a_series = pd.Series(row, index = data.columns) data = data.append(a_series,ignore_index=True) # Group age to age groups # In[122]: cons=data a1 = cons['0']+cons['1-4']+cons['5-9']+cons['10-14']+cons['15-19'] a2 = cons['20-24'] a3 = cons['25-29']+cons['30-34']+cons['35-39']+cons['40-44'] a4 = cons['45-49']+cons['50-54']+cons['55-59']+cons['60-64'] a5 = cons['70-74']+cons['75-79']+cons['80-84']+cons['85-89']+cons['90-94']+cons['95-99']+cons['>100'] cons1 = pd.DataFrame() # cons1['district'] = cons['district'] cons1['1'] = a1 cons1['2'] = a2 cons1['3'] = a3 cons1['4'] = a4 cons1['5'] = a5 cons1['f'] = cons['f'] cons1['m'] = cons['m'] cons1['total']=cons1.m+cons1.f cons1.index=["1_Circoscrizione","2_Circoscrizione","3_Circoscrizione","4_Circoscrizione","5_Circoscrizione","6_Circoscrizione"] cons1.to_csv(os.sep.join([data_path,"cons.csv"]))