#!/usr/bin/env python # coding: utf-8 # ### EUROSTAT data - sample data # # Description of EUROSTAT variables values: # # 1. (RB090)gender: 1- male, 2-female # 2. (RB080)age # 3. (PD060) Regularly participate in a leisure activity: # 1 Yes # No - cannot afford it # No - other reason # 4. (PL031) Self-defined current economic status: # 1 Employee working full-time # 2 Employee working part-time # 3 Self-employed working full-time (including family worker) # 4 Self-employed working part-time (including family worker) # 5 Unemployed # 6 Pupil, student, further training, unpaid work experience # 7 In retirement or in early retirement or has given up business # 8 Permanently disabled or/and unfit to work # 9 In compulsory military or community service # 10 Fulfilling domestic tasks and care responsibilities # 11 Other inactive person # 5. (PE040) Highest ISCED level attained: # 0 pre-primary education # 1 primary education # 2 lower secondary education # 3 (upper) secondary education # 4 post-secondary non tertiary education # 5 first stage of tertiary education (not leading directly to an # advanced research qualification) # 6 second stage of tertiary # In[ ]: import pandas as pd import numpy as np from os.path import join as joinpath data_path = joinpath("data", "bilbao", "before_simulation") sample = joinpath(data_path) household = "ES_2013h_EUSILC.csv" personal = "ES_2013p_EUSILC.csv" household_register = "ES_2013d_EUSILC.csv" personal_register = "ES_2013r_EUSILC.csv" # variables we need from files d_var = ["DB030", "DB040", "DB090"] # hhid, federal state, hh weight # person's id, REGULARLY PARTICIPATE IN A LEISURE ACTIVITY, SELF-DEFINED CURRENT ECONOMIC STATUS,HIGHEST ISCED LEVEL ATTAINED p_var = ["PB030", "PD060", "PL031", "PE040"] # person's id, person's gender, weights, year of birth r_var = ["RB030", "RB090", "RB050", "RB080"] # read data and merge h = pd.read_csv(joinpath(sample, household)) p = pd.read_csv(joinpath(sample, personal))[p_var] d = pd.read_csv(joinpath(sample, household_register))[d_var] r = pd.read_csv(joinpath(sample, personal_register))[r_var] r["hhid"] = (np.floor(r["RB030"]/10)).astype(int) r["RB080"] = 2021-r["RB080"] r = r.merge(d, left_on="hhid", right_on="DB030") #r = r.merge(p,left_on = "RB030", right_on="PB030") ind = r[["RB090", "RB080"]] ind = ind.rename(columns={"RB090": "gender", "RB080": "age"}) ind["gender"] = ind["gender"].replace({2: "f", 1: "m"}) ind.to_csv(joinpath(data_path, "ind.csv"), sep=",") # ### Aggregated data # # Preprocess it to fit the IPF algorithm # In[ ]: cons = pd.read_csv(joinpath(data_path, "numero_habitantes_distrito_barrio_edad_2020.csv"), sep=';') cons = cons.drop(cons.columns[[0,3,4]],1) # save totals column cons = cons[1:46].groupby(cons.columns[1])[cons.columns[2:]].sum() totals = cons["TOTAL BARRIO"] cons = cons.drop("TOTAL BARRIO", axis=1) cons = cons.drop(cons.columns[0:20], axis=1) cons = cons.drop(cons.columns[160:224], axis=1) males = cons[cons.columns[::2]] females = cons[cons.columns[1::2]] females = females.sum(axis=1) males = males.sum(axis=1) cons = cons.groupby((np.arange(len(cons.columns)) // 2) + 1, axis=1).sum() cons['f'] = females cons['m'] = males cons['totals'] = totals cons.columns = cons.columns.astype(str) # save sum_cons = cons.iloc[0:2, ].sum(axis=0) sum_cons = sum_cons.rename("ABANDO", axis=1) new_cons = cons.iloc[2:, ] new_cons = new_cons.append(sum_cons) cons = new_cons cons.to_csv(joinpath(data_path, "cons.csv"), sep=',') # ### Filter amenities # In[ ]: # amenities: sustence = ["bar", "cafe", "biergarten", "cafe", "fast_food", "food_court", "ice_cream", "pub", "restaurant"] education = ["college", "driving_school", "kindergarten", "language_school", "library", "toy_library", "music_school", "school", "university"] financial = ["bank"] healthcare = ["clinic", "dentist", "doctors", "hospital", "nursing_home", "pharmacy", "social_facility", "veterinary"] entertainment = ["arts_center", "casino", "cinema", "community_centre", "conference_centre", "events_venue", "nightclub", "planetarium", "social_centre", "studio", "theatre"] public_services = ["courthouse", "embassy", "fire_station", "police", "post_box", "post_depot", "post_office", "prison", "ranger_station", "townhall"] others = ["marketplace"] sustence = { "anemity": "", "lat": "", "long": "" } # read the osm file osm = open(joinpath(data_path, "amenities_bilbao.osm")) # TODO: extract location from # ### Preprocess OD matrices # # - districts to be used:  # In[ ]: odm = pd.read_csv(joinpath(data_path, "ODM_bilbao1.csv")) odm['From location'].unique() odm['To location'].unique() # districts DEUSTO = ['D1 NORTE', 'D1 SUR'] URIBARRI = ["CASTANOS", "D2 ALTO", "RIA ESTE", "RIA OESTE", "MB L3 (METRO BILBAO LINEA 3)"] OTXARKOAGA_TXURDINAGA = ["OTXARKOAGA", "TXURDINAGA"] BEGONA = ["D4 SUR"] IBAIONDO = ["CASCO VIEJO", "D5 ESTE", "D5 OESTE"] ABANDO = ['CENTRO', 'HOSPITAL', 'ACCESOS V (ACCESOS VIARIOS)', 'INTERMODAL'] ERREKALDE = ["D7 NORTE", "D7 SUR"] BASURT_ZORROTZA = ["BASURTO", "ZORROZA"] districts = ["ABANDO", "BASURT_ZORROTZA", "BEGONA", "DEUSTO", "ERREKALDE", "IBAIONDO", "OTXARKOAGA_TXURDINAGA", "URIBARRI"] for i in DEUSTO: odm = odm.replace(to_replace=str(i), value="DEUSTO") for i in URIBARRI: odm = odm.replace(to_replace=str(i), value="URIBARRI") for i in OTXARKOAGA_TXURDINAGA: odm = odm.replace(to_replace=str(i), value="OTXARKOAGA_TXURDINAGA") for i in BEGONA: odm = odm.replace(to_replace=str(i), value="BEGONA") for i in IBAIONDO: odm = odm.replace(to_replace=str(i), value="IBAIONDO") for i in ABANDO: odm = odm.replace(to_replace=str(i), value="ABANDO") for i in ERREKALDE: odm = odm.replace(to_replace=str(i), value="ERREKALDE") for i in BASURT_ZORROTZA: odm = odm.replace(to_replace=str(i), value="BASURT_ZORROTZA") odm.to_csv(joinpath(data_path, "ODM_bilbao_districtLevel.csv")) # Merge rows by column name to reduce num of rows odm = odm.groupby(['From location', 'To location']).sum() odm.to_csv(joinpath(data_path, "ODM_bilbao_districtLevel_reduced.csv"))