# Productivity Commission 2018 analysis for the Rising Inequality? Commission Research Paper # Load HILDA data - only load one of the three files at once. When you want to use a different file, start again from the '1_HILDA_setup' script. ################################################################## # DATA ######################### # CROSS-SECTIONAL DATA (2001 through 2016) # read in hilda data that has been organised using panelwhiz dat_df <- read.dta13(here("HILDA", "panelwhiz_workdir", "hilda", "proj", "cross_0116", "cross_0116-long.dta")) # convert dataframe to tbl class and remove original dataframe dat <- tbl_df(dat_df) rm(dat_df) ########################## # LONGITUDINAL DATA (2001 through 2016) # read in hilda data that has been organised using panelwhiz dat_df <- read.dta13(here("HILDA", "panelwhiz_workdir", "hilda", "proj", "long_01thru16", "long_01thru16-long.dta")) # convert dataframe to tbl class and remove original dataframe dat <- tbl_df(dat_df) rm(dat_df) ########################## # SHORT LONGITUDINAL DATA (2013 through 2016) # read in hilda data that has been organised using panelwhiz dat_df <- read.dta13(here("HILDA", "panelwhiz_workdir", "hilda", "proj", "long_13thru16", "long_13thru16-long.dta")) # convert dataframe to tbl class and remove original dataframe dat <- tbl_df(dat_df) rm(dat_df) ######################## # RESTRICTED FILE - HOUSEHOLD DISPOSABLE INCOME DATA # this code used to extract household disposable income values from the restricted file (not top coded or bottom coded) # Need to have access the restricted file # it take quite a long time to run. Only need to run to run it once, the it saves the abbreviated dataframe as an R object, that is called in the '3_variables_clean' R script # it works with both the longitudinal and cross sectional data files. # set up dataframe with years, prefixes, numbers to use as index in loop year_prefix <- as.data.frame(c(2001:2016)) year_prefix[2] <- c("a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p") year_prefix[3] <- c(1:16) colnames(year_prefix)[1:3] <- c("Year","Prefix", "Number") # pull out household disposable income variables for each year for (i in year_prefix$Number) { df <- read.dta13(paste0("H:\\Longitudinal Surveys\\Hilda R16 restricted\\Stata 160u\\Combined_",year_prefix$Prefix[i],"160u.dta") ) # enter location of restricted files here df <- select(df, xwaveid, ends_with("hifdip"), ends_with("hifdin")) # select variables we want to keep names(df)[2:3] <- c("hifdip_res", "hifdin_res") df$year <- year_prefix$Year[i] if (i==1) { HILDA_disp_inc_res <- df } if (i>1) { HILDA_disp_inc_res <- bind_rows(HILDA_disp_inc_res, df) } } # save as an R object saveRDS(HILDA_disp_inc_res, file= "H:\\Longitudinal Surveys\\Hilda R16 restricted\\hh_disp_inc_res.rds")