set.seed(10) #this data was pre-processed using python script data_path <- file.path("data", "helsinki", "before_simulation") ind <- read.csv(file.path(data_path, "ind.csv")) cons <- read.csv(file.path(data_path, "cons.csv")) colnames(cons) <- c('district',c(1:7),c('m','f','totals')) #remove district districts <- cons[1] cons <- cons[,-c(1)] ind$age <- as.character(ind$age) #flattering individual data cat_age <- model.matrix(~ ind$age-1) cat_gender <- model.matrix(~ ind$gender - 1)[,c(2,1)] ind_cat <- cbind(cat_age, cat_gender) ind_agg <- colSums(ind_cat) #create initial weight matrix weights <- matrix(data=1,nrow = nrow(ind), ncol = nrow(cons)) #apply ipf library(ipfp) cons <- apply(cons, 2, as.numeric) weights_maxit_2 <- weights # create a copy of the weights object for(i in 1:ncol(weights)){ weights_maxit_2[,i] <- ipfp(cons[i,], t(ind_cat), rep(1,nrow(ind)), maxit = 2) } n_dist <- nrow(districts) data <-ind data <- cbind(data,w=weights_maxit_2[,1]) data <- cbind(data,dist=1) for (i in 2:n_dist){ p <- ind p <- cbind(p,w=weights_maxit_2[,i]) p <- cbind(p,dist=i) data <- rbind(data,p) } #integerisation of weights int_trs <- function(x){ # For generalisation purpose, x becomes a vector xv <- as.vector(x) # allows trs to work on matrices xint <- floor(xv) # integer part of the weight r <- xv - xint # decimal part of the weight def <- round(sum(r)) # the deficit population # the weights be 'topped up' (+ 1 applied) topup <- sample(length(x), size = def, prob = r) xint[topup] <- xint[topup] + 1 dim(xint) <- dim(x) dimnames(xint) <- dimnames(x) xint } int_weights <- int_trs(data$w) data <- cbind(data,int_weights) data$X <- c(1:187664) #expansion int_expand_vector <- function(x){ index <- 1:length(x) rep(index, round(x)) } exp_indices <- int_expand_vector(int_weights) ind_full <- data[exp_indices,c( "age", "gender", "dist")] ind_full <- data.frame(ind_full) write.csv(ind_full, file.path(data_path,"indHelsinki.csv"), quote=FALSE)