# Productivity Commission 2018 analysis for the Rising Inequality? Commission Research Paper


# Load HILDA data - only load one of the three files at once. When you want to use a different file, start again from the '1_HILDA_setup' script.




##################################################################
# DATA






#########################
# CROSS-SECTIONAL DATA (2001 through 2016)


# read in hilda data that has been organised using panelwhiz
dat_df <- read.dta13(here("HILDA", "panelwhiz_workdir", "hilda", "proj", "cross_0116", "cross_0116-long.dta"))
# convert dataframe to tbl class and remove original dataframe
dat <- tbl_df(dat_df)
rm(dat_df)






##########################
# LONGITUDINAL DATA   (2001 through 2016)


# read in hilda data that has been organised using panelwhiz
dat_df <- read.dta13(here("HILDA", "panelwhiz_workdir", "hilda", "proj", "long_01thru16", "long_01thru16-long.dta"))
# convert dataframe to tbl class and remove original dataframe
dat <- tbl_df(dat_df)
rm(dat_df)







##########################
# SHORT LONGITUDINAL DATA   (2013 through 2016)


# read in hilda data that has been organised using panelwhiz
dat_df <- read.dta13(here("HILDA", "panelwhiz_workdir", "hilda", "proj", "long_13thru16", "long_13thru16-long.dta"))
# convert dataframe to tbl class and remove original dataframe
dat <- tbl_df(dat_df)
rm(dat_df)






########################
# RESTRICTED FILE - HOUSEHOLD DISPOSABLE INCOME DATA

# this code used to extract household disposable income values from the restricted file (not top coded or bottom coded)
# Need to have access the restricted file
# it take quite a long time to run. Only need to run to run it once, the it saves the abbreviated dataframe as an R object, that is called in the '3_variables_clean' R script
# it works with both the longitudinal and cross sectional data files.


# set up dataframe with years, prefixes, numbers to use as index in loop
year_prefix <- as.data.frame(c(2001:2016))
year_prefix[2] <- c("a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p")
year_prefix[3] <- c(1:16)
colnames(year_prefix)[1:3] <- c("Year","Prefix", "Number")

# pull out household disposable income variables for each year
for (i in year_prefix$Number) {
  df <- read.dta13(paste0("H:\\Longitudinal Surveys\\Hilda R16 restricted\\Stata 160u\\Combined_",year_prefix$Prefix[i],"160u.dta") )   # enter location of restricted files here
  df <- select(df, xwaveid, ends_with("hifdip"), ends_with("hifdin"))  # select variables we want to keep
  names(df)[2:3] <- c("hifdip_res", "hifdin_res")
  df$year <- year_prefix$Year[i]
  if (i==1) {
    HILDA_disp_inc_res <- df
  }
  if (i>1) {
    HILDA_disp_inc_res <- bind_rows(HILDA_disp_inc_res, df)
  }
}

# save as an R object
saveRDS(HILDA_disp_inc_res, file= "H:\\Longitudinal Surveys\\Hilda R16 restricted\\hh_disp_inc_res.rds")