Select Git revision
amsterdam_population.R
amsterdam_population.R 2.24 KiB
set.seed(10)
#this data was pre-processed using python script
data_path <- file.path("data", "amsterdam", "before_simulation")
ind <- read.csv(file.path(data_path, "ind.csv"))
cons <- read.csv(file.path(data_path, "cons.csv"))
colnames(cons) <- c('district',c(1:8),c('totals'))
#remove district and totals
totals <- cons["totals"]
districts <- cons[1]
districts <- as.data.frame(districts[-24,])
cons <- cons[1:23,-c(1,dim(cons)[2])]
#ind <- subset(ind,ind$age>9)
ind$age <- as.character(ind$age)
#flattering individual data
cat_age <- model.matrix(~ ind$age-1)
#don't have constraints for gender
#cat_gender <- model.matrix(~ ind$gender - 1)[,c(2,1)]
ind_cat <- cbind(cat_age)
ind_agg <- colSums(ind_cat)
rbind(cons[1,], ind_agg)
#create initial weight matrix
weights <- matrix(data=1,nrow = nrow(ind), ncol = nrow(cons))
#remove age group 1 from cons since no individuals in the sample data are in that group
cons <- cons[,-1]
#apply ipf
library(ipfp)
cons <- apply(cons, 2, as.numeric)
weights_maxit_2 <- weights # create a copy of the weights object
for(i in 1:ncol(weights)){
weights_maxit_2[,i] <- ipfp(cons[i,], t(ind_cat), rep(1,nrow(ind)), maxit = 2)
}
n_dist <- nrow(districts)
data <-ind
data <- cbind(data,w=weights_maxit_2[,1])
data <- cbind(data,dist=1)
for (i in 2:n_dist){
p <- ind
p <- cbind(p,w=weights_maxit_2[,i])
p <- cbind(p,dist=i)
data <- rbind(data,p)
}
#integerisation of weights
int_trs <- function(x){
# For generalisation purpose, x becomes a vector
xv <- as.vector(x) # allows trs to work on matrices
xint <- floor(xv) # integer part of the weight
r <- xv - xint # decimal part of the weight
def <- round(sum(r)) # the deficit population
# the weights be 'topped up' (+ 1 applied)
topup <- sample(length(x), size = def, prob = r)
xint[topup] <- xint[topup] + 1
dim(xint) <- dim(x)
dimnames(xint) <- dimnames(x)
xint
}
int_weights <- int_trs(data$w)
data <- cbind(data,int_weights)
data$X <- c(1:514395)
#expansion
int_expand_vector <- function(x){
index <- 1:length(x)
rep(index, round(x))
}
exp_indices <- int_expand_vector(int_weights)
ind_full <- data[exp_indices,c( "age", "gender", "dist")]
ind_full <- data.frame(ind_full)
write.csv(ind_full, file.path(data_path, "ind_full.csv"), quote=FALSE)