Skip to content
Snippets Groups Projects
Select Git revision
  • master
1 result

amsterdam_population.R

Blame
  • amsterdam_population.R 2.24 KiB
    set.seed(10)
    
    #this data was pre-processed using python script
    data_path <- file.path("data", "amsterdam", "before_simulation")
    ind <- read.csv(file.path(data_path, "ind.csv"))
    cons <- read.csv(file.path(data_path, "cons.csv"))
    
    colnames(cons) <- c('district',c(1:8),c('totals'))
    
    #remove district and totals
    totals <- cons["totals"]
    districts <- cons[1]
    districts <- as.data.frame(districts[-24,])
    cons <- cons[1:23,-c(1,dim(cons)[2])]
    
    #ind <- subset(ind,ind$age>9)
    
    ind$age <- as.character(ind$age)
    
    #flattering individual data
    cat_age <- model.matrix(~ ind$age-1)
    
    #don't have constraints for gender
    #cat_gender <- model.matrix(~ ind$gender - 1)[,c(2,1)]
    
    ind_cat <- cbind(cat_age)
    ind_agg <- colSums(ind_cat)
    
    rbind(cons[1,], ind_agg)
    
    #create initial weight matrix
    weights <- matrix(data=1,nrow = nrow(ind), ncol = nrow(cons))
    
    #remove age group 1 from cons since no individuals in the sample data are in that group
    cons <- cons[,-1]
    
    #apply ipf
    library(ipfp)
    cons <- apply(cons, 2, as.numeric)
    weights_maxit_2 <- weights # create a copy of the weights object
    for(i in 1:ncol(weights)){
      weights_maxit_2[,i] <- ipfp(cons[i,], t(ind_cat), rep(1,nrow(ind)), maxit = 2)
    }
    
    
    n_dist <- nrow(districts)
    data <-ind
    data <- cbind(data,w=weights_maxit_2[,1])
    data <- cbind(data,dist=1)
    for (i in 2:n_dist){
      p <- ind
      p <- cbind(p,w=weights_maxit_2[,i])
      p <- cbind(p,dist=i)
      data <- rbind(data,p)
    }
    
    
    
    #integerisation of weights 
    
    int_trs <- function(x){
      # For generalisation purpose, x becomes a vector
      xv <- as.vector(x) # allows trs to work on matrices
      xint <- floor(xv) # integer part of the weight
      r <- xv - xint # decimal part of the weight
      def <- round(sum(r)) # the deficit population
      # the weights be 'topped up' (+ 1 applied)
      topup <- sample(length(x), size = def, prob = r)
      xint[topup] <- xint[topup] + 1
      dim(xint) <- dim(x)
      dimnames(xint) <- dimnames(x)
      xint
    }
    
    int_weights <- int_trs(data$w)
    data <- cbind(data,int_weights)
    data$X <- c(1:514395)
    
    
    #expansion 
    int_expand_vector <- function(x){
      index <- 1:length(x)
      rep(index, round(x))
    }
    
    exp_indices <- int_expand_vector(int_weights)
    ind_full <- data[exp_indices,c( "age", "gender", "dist")]
    ind_full <- data.frame(ind_full)
    write.csv(ind_full, file.path(data_path, "ind_full.csv"), quote=FALSE)