Skip to content
Snippets Groups Projects
Select Git revision
  • master
1 result

network.py

Blame
  • helsinki_preprocess.py 3.61 KiB
    #!/usr/bin/env python
    # coding: utf-8
    
    # ### Sample data
    # Age groups:
    # 1. 0-18
    # 2. 18-25
    # 3. 25-35
    # 4. 35-45
    # 5. 45-55
    # 6. 55-65
    # 7. 65+
    # 
    #     
    
    # In[2]:
    
    
    import pandas as pd
    import numpy as np
    import os
    from os.path import join as joinpath
    
    data_path = joinpath("data", "helsinki", "before_simulation")
    sample = joinpath(data_path)
    household = "FI_2013h_EUSILC.csv"
    personal = "FI_2013p_EUSILC.csv"
    household_register = "FI_2013d_EUSILC.csv"
    personal_register = "FI_2013r_EUSILC.csv"
    
    #variables we need from files
    d_var = ["DB030","DB090"]#hhid, federal state, hh weight
    p_var = ["PB030","PL031"] #economic status
    r_var = ["RB030","RB090","RB050","RB080"] #person's id, person's gender, weights, year of birth
    
    #read data and merge 
    h = pd.read_csv(joinpath(sample, household))
    p = pd.read_csv(joinpath(sample, personal))[p_var]
    d = pd.read_csv(joinpath(sample, household_register))[d_var]
    r = pd.read_csv(joinpath(sample, personal_register))[r_var]
    r["hhid"] = (np.floor(r["RB030"]/100)).astype(int)
    r["RB080"] = 2022-r["RB080"]
    
    r = r.merge(d, left_on = "hhid", right_on = "DB030")
    r.to_csv(joinpath(data_path, "microdata_NL.csv"))
    
    # add grpup og age instead age
    age = r["RB080"]
    for i in range(0,18):
        age = age.replace(i,1)
    for i in range(18,25):
        age = age.replace(i,2)
    for i in range(25,35):
        age = age.replace(i,3)
    for i in range(35,45):
        age = age.replace(i,4)
    for i in range(45,55):
        age = age.replace(i,5)
    for i in range(55,65):
        age = age.replace(i,6)
    for i in range(65,120):
        age = age.replace(i,7)
    r["RB080"] = age
    
    ind = r[["RB090","RB080"]]
    ind = ind.rename(columns={"RB090":"gender","RB080":"age"})
    ind["gender"] = ind["gender"].replace({2:"f",1:"m"})
    ind.to_csv(os.sep.join([data_path, "ind.csv"]),sep=",")
    
    
    # ### Marginal data
    
    # In[117]:
    
    
    import pandas as pd
    import numpy as np
    
    cons = pd.read_csv(os.sep.join([data_path, "Hki_vaesto_taulu9a.csv"]))
    # gender = cons[3,]
    districts = ["1 Eteläinen","2 Läntinen","3 Keskinen","4 Pohjoinen","5 Koillinen","6 Kaakkoinen","7 Itäinen","8 Östersundom"]
    # pd.to_numeric(cons)
    # make age groups by merging columns:
    # cons[2:].groupby("Ikä, vuotta").sum()
    a = cons[2:]
    a = a.apply(pd.to_numeric)
    a = a.groupby("Ikä, vuotta").sum()
    aa = a.groupby((np.arange(len(a.columns)) // 2) + 1, axis=1).sum()
    aa = aa.T
    aa.reset_index(inplace=True)
    aa.drop("index",axis=1,inplace=True)
    
    gender = cons[1:2].drop("Ikä, vuotta",axis=1)
    gender = gender.T
    gender.reset_index(inplace=True)
    gender.drop("index",axis=1,inplace=True)
    df = pd.DataFrame(gender.values.reshape(-1, 2), columns = ['males', 'females'])
    
    cons = pd.concat([aa,df], axis=1,join = 'outer')
    cons.index = districts
    cons.males = cons.males.astype(int)
    cons.females = cons.females.astype(int)
    cons['total'] = cons.males + cons.females
    cons.to_csv(os.sep.join([data_path, "cons.csv"]),sep=",")
    
    
    # ### Ferry arrivals data
    
    # In[133]:
    
    
    import os
    import json
    import pandas as pd
    
    
    ferries_path = os.sep.join([data_path, "ferries.json"])
    ferries = open(ferries_path)
    data = json.load(ferries)
    row = []
    for i in range(0,len(data)):
        hour = data[i]['startDate'][12:13]
        minutes = data[i]['startDate'][14:16]
        time = int(hour)*60*60 + int(minutes)*60
        row.append([data[i]['terminal'],time,data[i]['passengerCount'],data[i]['departsFromHarbour'],data[i]['arrivesToHarbour'],data[i]['subCategory']])
    
    ferries_csv = pd.DataFrame(row,columns=["terminal","time (sec)","passCount","departsFrom","arrivesTo","category"])
    ferries_Jatkasaari  = ferries_csv.loc[ferries_csv['terminal'] == 'West Terminal 2']
    ferries_csv.to_csv(os.sep.join([data_path, 'ferries.csv']))
    ferries_Jatkasaari.to_csv(os.sep.join([data_path, 'ferriesJatkasaari.csv']))