Skip to content
Snippets Groups Projects
Select Git revision
  • main
1 result

bootstrap.css.map

Blame
  • messina_preprocess.py 3.24 KiB
    #!/usr/bin/env python
    # coding: utf-8
    
    # Age groups:
    # 1. 0-19
    # 2. 20-44
    # 3. 45-64
    # 4. 65+
    
    # In[7]:
    
    
    import pandas as pd
    import numpy as np
    import os
    
    
    data_path = os.sep.join(["data","messina", "before_simulation"])
    sample_data = os.sep.join([data_path])
    household = "IT_2013h_EUSILC.csv"
    personal = "IT_2013p_EUSILC.csv"
    household_register = "IT_2013d_EUSILC.csv"
    personal_register = "IT_2013r_EUSILC.csv"
    
    #variables we need from files
    d_var = ["DB030","DB040","DB090"]#hhid, federal state, hh weight
    p_var = ["PB030","PL031"] #economic status
    r_var = ["RB030","RB090","RB050","RB080"] #person's id, person's gender, weights, year of birth
    
    #read data and merge 
    h = pd.read_csv(os.sep.join([sample_data,household]))
    p = pd.read_csv(os.sep.join([sample_data,personal]))[p_var]
    d = pd.read_csv(os.sep.join([sample_data,household_register]))[d_var]
    r = pd.read_csv(os.sep.join([sample_data,personal_register]))[r_var]
    r["hhid"] = (np.floor(r["RB030"]/10)).astype(int)
    r["RB080"] = 2021-r["RB080"]
    
    r = r.merge(d, left_on = "hhid", right_on = "DB030")
    # r.to_csv(data_path+"microdata_ES.csv")
    #       
    # add grpup og age instead age
    age = r["RB080"]
    for i in range(0,19):
        age = age.replace(i,1)
    for i in range(20,24):
        age = age.replace(i,2)
    for i in range(25,44):
        age = age.replace(i,3)
    for i in range(45,64):
        age = age.replace(i,4)
    for i in range(65,120):
        age = age.replace(i,5)
    r["RB080"] = age
    
    ind = r[["RB090","RB080"]]
    ind = ind.rename(columns={"RB090":"gender","RB080":"age"})
    ind["gender"] = ind["gender"].replace({2:"f",1:"m"})
    ind.to_csv(os.sep.join([data_path,"ind.csv"]),sep=",")
    
    
    # Filter marginal data
    
    # In[110]:
    
    
    # Read json marginal data
    import json
    marginal_path = os.sep.join([data_path,'marginal_data.json'])
    f = open(marginal_path)
    marginal_data = json.load(f)
    
    header = list()
    for i in marginal_data["districts-summary"]["district-I"]["ages"]:
        header.append(i)
    header.append("m")
    header.append("f")
    data = pd.DataFrame(columns=header)
    
    
    for i in marginal_data["districts-summary"]:
        row=list()
        males = 0
        females = 0
        for j in marginal_data["districts-summary"][i]["ages"]:
            row.append(marginal_data["districts-summary"][i]["ages"][j]["M"]+marginal_data["districts-summary"][i]["ages"][j]["F"])
            males+=marginal_data["districts-summary"][i]["ages"][j]["F"]
            females+=marginal_data["districts-summary"][i]["ages"][j]["M"]
        row.append(males)
        row.append(females)
        a_series = pd.Series(row, index = data.columns)
        data = data.append(a_series,ignore_index=True)
    
    
    # Group age to age groups
    
    # In[122]:
    
    
    cons=data
    a1 = cons['0']+cons['1-4']+cons['5-9']+cons['10-14']+cons['15-19']
    a2 = cons['20-24']
    a3 = cons['25-29']+cons['30-34']+cons['35-39']+cons['40-44']
    a4 = cons['45-49']+cons['50-54']+cons['55-59']+cons['60-64']
    a5 = cons['70-74']+cons['75-79']+cons['80-84']+cons['85-89']+cons['90-94']+cons['95-99']+cons['>100']
    cons1 = pd.DataFrame()
    # cons1['district']  = cons['district']
    cons1['1'] = a1
    cons1['2'] = a2
    cons1['3'] = a3
    cons1['4'] = a4
    cons1['5'] = a5
    cons1['f'] = cons['f']
    cons1['m'] = cons['m']
    cons1['total']=cons1.m+cons1.f
    cons1.index=["1_Circoscrizione","2_Circoscrizione","3_Circoscrizione","4_Circoscrizione","5_Circoscrizione","6_Circoscrizione"]
    cons1.to_csv(os.sep.join([data_path,"cons.csv"]))