amsterdam_population.R

set.seed(10)

#this data was pre-processed using python script
data_path <- file.path("data", "amsterdam", "before_simulation")
ind <- read.csv(file.path(data_path, "ind.csv"))
cons <- read.csv(file.path(data_path, "cons.csv"))

colnames(cons) <- c('district',c(1:8),c('totals'))

#remove district and totals
totals <- cons["totals"]
districts <- cons[1]
districts <- as.data.frame(districts[-24,])
cons <- cons[1:23,-c(1,dim(cons)[2])]

#ind <- subset(ind,ind$age>9)

ind$age <- as.character(ind$age)

#flattering individual data
cat_age <- model.matrix(~ ind$age-1)

#don't have constraints for gender
#cat_gender <- model.matrix(~ ind$gender - 1)[,c(2,1)]

ind_cat <- cbind(cat_age)
ind_agg <- colSums(ind_cat)

rbind(cons[1,], ind_agg)

#create initial weight matrix
weights <- matrix(data=1,nrow = nrow(ind), ncol = nrow(cons))

#remove age group 1 from cons since no individuals in the sample data are in that group
cons <- cons[,-1]

#apply ipf
library(ipfp)
cons <- apply(cons, 2, as.numeric)
weights_maxit_2 <- weights # create a copy of the weights object
for(i in 1:ncol(weights)){
  weights_maxit_2[,i] <- ipfp(cons[i,], t(ind_cat), rep(1,nrow(ind)), maxit = 2)
}


n_dist <- nrow(districts)
data <-ind
data <- cbind(data,w=weights_maxit_2[,1])
data <- cbind(data,dist=1)
for (i in 2:n_dist){
  p <- ind
  p <- cbind(p,w=weights_maxit_2[,i])
  p <- cbind(p,dist=i)
  data <- rbind(data,p)
}


#integerisation of weights 

int_trs <- function(x){
  # For generalisation purpose, x becomes a vector
  xv <- as.vector(x) # allows trs to work on matrices
  xint <- floor(xv) # integer part of the weight
  r <- xv - xint # decimal part of the weight
  def <- round(sum(r)) # the deficit population
  # the weights be 'topped up' (+ 1 applied)
  topup <- sample(length(x), size = def, prob = r)
  xint[topup] <- xint[topup] + 1
  dim(xint) <- dim(x)
  dimnames(xint) <- dimnames(x)
  xint
}

int_weights <- int_trs(data$w)
data <- cbind(data,int_weights)
data$X <- c(1:514395)


#expansion 
int_expand_vector <- function(x){
  index <- 1:length(x)
  rep(index, round(x))
}

exp_indices <- int_expand_vector(int_weights)
ind_full <- data[exp_indices,c( "age", "gender", "dist")]
ind_full <- data.frame(ind_full)
write.csv(ind_full, file.path(data_path, "ind_full.csv"), quote=FALSE)