#Productivity Commission 2018 analysis for the Rising Inequality? Commission Research Paper #Script for loading HES data #load packages library(here) library(dplyr) library(haven) library(readxl) library(Hmisc) library(tidyr) library(reldist) library(questionr) library(ggplot2) library(scales) library(stringr) library(data.table) library(splitstackshape) library(ergm) library(tfplot) library(DataCombine) library(spatstat) library(quantreg) library(PerformanceAnalytics) library(corrplot) library(measurements) library(devEMF) library(zoo) library(ggrepel) library(gridExtra) library(forcats) # assign functions to packages wtd.mean <- Hmisc::wtd.mean wtd.quantile <- Hmisc::wtd.quantile wtd.table <- questionr::wtd.table wtd.median <- ergm::wtd.median lag <- dplyr::lag select <- dplyr::select gini <- reldist::gini ########################################################################### ## LOAD DATAFRAMES ########################################################################### #load each of the HES person, household and expenditure files by year - requires access to HES CURFs for each year data_years<-c("88","93","98","03","10","15") for (year in data_years) { assign(paste0("HESP",year), read_dta(here("HES","Data",paste0("HES", year,"BP.dta")))) assign(paste0("HESH",year), read_dta(here("HES","Data",paste0("HES", year,"BH.dta")))) assign(paste0("HESE",year), read_dta(here("HES","Data",paste0("HES", year,"BX.dta")))) } #Correct inconsistent data classes across years (household files) HESH98<-HESH98 %>% mutate(ABSFID=as.character(ABSFID)) HESH98<-HESH98 %>% mutate(ABSIID=as.character(ABSIID)) HESH98<-HESH98 %>% mutate(ABSPID=as.character(ABSPID)) HESH03<-HESH03 %>% mutate(ABSFID=as.character(ABSFID)) HESH03<-HESH03 %>% mutate(ABSIID=as.character(ABSIID)) HESH03<-HESH03 %>% mutate(ABSPID=as.character(ABSPID)) HESH03<-HESH03 %>% mutate(ABSEID=as.character(ABSEID)) HESH10<-HESH10 %>% mutate(ABSEID=as.character(ABSEID)) HESH10<-HESH10 %>% mutate(ABSLID=as.numeric(ABSLID)) #Correct inconsistent data classes across years (person files) HESP98<-HESP98 %>% mutate(ABSFID=as.character(ABSFID)) HESP98<-HESP98 %>% mutate(ABSIID=as.character(ABSIID)) HESP98<-HESP98 %>% mutate(ABSPID=as.character(ABSPID)) HESP03<-HESP03 %>% mutate(ABSFID=as.character(ABSFID)) HESP03<-HESP03 %>% mutate(ABSIID=as.character(ABSIID)) HESP03<-HESP03 %>% mutate(ABSPID=as.character(ABSPID)) HESP10<-HESP10 %>% mutate(ABSEID=as.character(ABSEID)) HESP10<-HESP10 %>% mutate(ABSLID=as.numeric(ABSLID)) HESP10<-HESP10 %>% mutate(LEVELPS=as.character(LEVELPS)) #Correct inconsistent data classes across years (expenditure files) HESE93<-HESE93 %>% mutate(ABSFID=as.character(ABSFID)) HESE93<-HESE93 %>% mutate(ABSIID=as.character(ABSIID)) HESE93<-HESE93 %>% mutate(ABSPID=as.character(ABSPID)) HESE93<-HESE93 %>% mutate(ABSEID=as.character(ABSEID)) HESE98<-HESE98 %>% mutate(ABSFID=as.character(ABSFID)) HESE98<-HESE98 %>% mutate(ABSIID=as.character(ABSIID)) HESE98<-HESE98 %>% mutate(ABSPID=as.character(ABSPID)) HESE03<-HESE03 %>% mutate(ABSFID=as.character(ABSFID)) HESE03<-HESE03 %>% mutate(ABSIID=as.character(ABSIID)) HESE03<-HESE03 %>% mutate(ABSPID=as.character(ABSPID)) HESE03<-HESE03 %>% mutate(ABSEID=as.character(ABSEID)) HESE03<-HESE03 %>% mutate(ABSLID=as.character(ABSLID)) HESE10<-HESE10 %>% mutate(ABSEID=as.character(ABSEID)) HESE10<-HESE10 %>% mutate(LEVELEXP=as.character(LEVELEXP)) #Create variable in HESH93 for ABSHID (otherwise missing) HESH93 <- HESH93 %>% mutate(ABSHID=paste0("HES93B",str_pad(HESH93$idhh,6,pad="0"))) #Recreate variable in HESH03 and HESE03 for ABSHID (otherwise does not match ABSHID in HESP03) HESH03$ABSHID <- sub("B1","10",HESH03$ABSHID) HESE03$ABSHID <- sub("B1","10",HESE03$ABSHID) #bind all person files together HESP<-bind_rows(HESP88,HESP93,HESP98,HESP03,HESP10,HESP15, .id = "year_id") #bind all household files together HESH<-bind_rows(HESH88,HESH93,HESH98,HESH03,HESH10,HESH15, .id = "year_id") #bind all expenditure files together HESE<-bind_rows(HESE88,HESE93,HESE98,HESE03,HESE10,HESE15, .id = "year_id") #remove individual year HES dataframes HES_year_filenames<-c(paste0("HESP",data_years),paste0("HESH",data_years),paste0("HESE",data_years)) rm(list=HES_year_filenames) ########################################################################### ## DROP OUTLIER OBSERVATIONS ########################################################################### #Drop 10 households from 2016 HESH, HESP and HESE with very high veterans pension transfers ($6345 per week) (ISERVCP) HESH <- HESH %>% filter(ABSHID != "HES15B8997172", ABSHID != "HES15B8982779", ABSHID != "HES15B8982983", ABSHID != "HES15B8984068", ABSHID != "HES15B8986064", ABSHID != "HES15B8987209", ABSHID != "HES15B8992034", ABSHID != "HES15B8993124", ABSHID != "HES15B8995968", ABSHID != "HES15B8996716") HESP <- HESP %>% filter(ABSHID != "HES15B8997172", ABSHID != "HES15B8982779", ABSHID != "HES15B8982983", ABSHID != "HES15B8984068", ABSHID != "HES15B8986064", ABSHID != "HES15B8987209", ABSHID != "HES15B8992034", ABSHID != "HES15B8993124", ABSHID != "HES15B8995968", ABSHID != "HES15B8996716") HESE <- HESE %>% filter(ABSHID != "HES15B8997172", ABSHID != "HES15B8982779", ABSHID != "HES15B8982983", ABSHID != "HES15B8984068", ABSHID != "HES15B8986064", ABSHID != "HES15B8987209", ABSHID != "HES15B8992034", ABSHID != "HES15B8993124", ABSHID != "HES15B8995968", ABSHID != "HES15B8996716")