# Productivity Commission 2018 analysis for the Rising Inequality? Commission Research Paper # HILDA variable construction - will work with the Cross sec, Longitudinal and short longitudinal data files. ######################################################### # FUNCTIONS # define preferred functions where multiples exist (e.g. wtd.mean as Hmisc version so doesn't clash with a different version in another package - reldist) wtd.mean <- Hmisc::wtd.mean wtd.quantile <- Hmisc::wtd.quantile wtd.var <- Hmisc::wtd.var wtd.table <- questionr::wtd.table gini <- reldist::gini select <- dplyr::select lag <- dplyr::lag distinct <- dplyr::distinct ####################################################### # CPI - input and add to dataframe #generate CPI multipler variable to convert to 2016-17 dollars (see CPI calculations spreadsheet in data folder) CPI <- read_excel(here("CPI.xlsx"), sheet="Deflators") # give each observation its correct deflator dat <-left_join(dat, CPI, by="year") # remove CPI dataframe rm(CPI) ##################################### # LABELS ##################################### # Financial year lables - accounting for lag inc_fin_year_labels = as_labeller(c('1994' = "1994-95", '1996' = "1996-97", '1997' = "1996-97", '1998' = "1997-98", '2000' = "2000-01", '2001' = "2000-01", '2002' = "2001-02", '2003' = "2002-03", '2004' = "2003-04", '2005' = "2004-05", '2006' = "2005-06", '2007' = "2006-07", '2008' = "2007-08", '2009' = "2008-09", '2010' = "2009-10", '2011' = "2010-11", '2012' = "2011-12", '2013' = "2012-13", '2014' = "2013-14", '2015' = "2014-15", '2016' = "2015-16")) wealth_fin_year_labels = as_labeller(c('1988' = "1988-89", '1990' = "1990-91", '1991' = "1991-92", '1992' = "1992-93", '1993' = "1993-94", '1994' = "1994-95", '1996' = "1996-97", '1997' = "1997-98", '1998' = "1998-99", '2000' = "2000-01", '2001' = "2001-02", '2002' = "2002-03", '2003' = "2003-04", '2004' = "2004-05", '2005' = "2005-06", '2006' = "2006-07", '2007' = "2007-08", '2008' = "2008-09", '2009' = "2009-10", '2010' = "2010-11", '2011' = "2011-12", '2012' = "2012-13", '2013' = "2013-14", '2014' = "2014-15", '2015' = "2015-16", '2016' = "2016-17")) short_wealth_fin_year_labels = as_labeller(c('1988' = "'88-89", '1993' = "'93-94", '1998' = "'98-99", '2001' = "'01-02", '2002' = "'02-03", '2003' = "'03-04", '2004' = "", '2005' = "'05-06", '2006' = "'06-07", '2007' = "'07-08", '2008' = "", '2009' = "'09-10", '2010' = "'10-11", '2011' = "'11-12", '2012' = "", '2013' = "'13-14", '2014' = "'14-15", '2015' = "'15-16", '2016' = "'16-17")) inc_dec_labels = as_labeller(c(`1` = "Bottom", `2` = "2", `3` = "3", `4` = "4", `5` = "5", `6` = "6", `7` = "7", `8` = "8", `9` = "9", `10` = "Top")) son_position_labels = as_labeller(c(`bottom_decile` = "Bottom decile", `bottom2_deciles` = "Bottom two deciles", `top_5_deciles` = "Top half of\nthe distribution", `top2_deciles` = "Top two deciles", `xTop_decile` = "Top decile")) father_percentile_labels = as_labeller(c('_5th'="5th", '20th'="20th", '50th'="50th", '80th'="80th", '95th'="95th" )) income_types <- c("eq_lab_inc", "eq_cap_inc", "eq_trans_inc", "eq_inc_tax") age_groups <- c("under15s", "15to24", "25to34", "35to44", "45to54", "55to64", "65plus") HESP_age_groups <- c("15to24", "25to34", "35to44", "45to54", "55to64", "65plus") period_labels = as_labeller(c(`2010` = "'03-04 to '09-10", `2016` = "'09-10 to '15-16")) HESP_period_labels = as_labeller(c( `2010` = "'03-04 to '09-10", `2016` = "'09-10 to '15-16")) income_labels = as_labeller(c(eq_disp_inc = "Disposable income", eq_priv_inc = "Private income", eq_gross_inc = "Gross income")) consumption_gini_labels= as_labeller(c(eq_disp_inc = "Disposable income", eq_cons = "Final consumption", eq_cons_exp = "Consumption expenditure", eq_cons_no_inkind = "Private consumption")) consumption_labels = as_labeller(c(eq_cons = "Private consumption", eq_disp_inc = "Disposable income")) inkind_type_labels= as_labeller(c(eq_inkind_health = "Health", eq_inkind_educ = "Education", eq_inkind_welfare = "Welfare", eq_inkind_childcare = "Childcare", eq_inkind_govt_rent = "Government housing")) marginal_effect_labels = as_labeller(c(gross_less_cap = "Capital income", gross_less_lab = "Labour income", gross_less_trans = "Transfer income", disp_less_tax = "Income tax")) inc_dec_labels = as_labeller(c(`1` = "Bottom", `2` = "2", `3` = "3", `4` = "4", `5` = "5", `6` = "6", `7` = "7", `8` = "8", `9` = "9", `10` = "Top")) wealth_dec_labels = inc_dec_labels income_type_labels = as_labeller(c("eq_lab_inc" = "Labour income", "eq_cap_inc" = "Capital income", "eq_trans_inc" = "Transfer income", "eq_inc_tax" = "Income tax")) wealth_type_labels = as_labeller(c("eq_home_equity" = "Owner occupied\nhousing", "eq_other_property_equity" = "Other property", "eq_super" = "Super", "eq_business" = "Business", "eq_financial_equity" = "Financial", "eq_vehicle_equity" = "Vehicle", "eq_personal_equity" = "Personal")) wealth_type_labels_consolidated = as_labeller(c("eq_home_equity" = "Owner occupied\nhousing", "eq_super" = "Superannuation", "eq_other" = "Other")) household_type_labels = as_labeller(c("family_employed_1" = "Family, 1 income", "family_employed_2" = "Family, 2+ incomes", "family_unemployed" = "Family, no paid work", "retiree_other" = "Retiree, no pension", "retiree_pension" = "Retiree, receiving pension", "working_age_employed" = "Working age, employed", "working_age_unemployed" = "Working age, no paid work")) household_type_labels_poverty = as_labeller(c("family_employed" = "Family,\n1+ employed", "family_unemployed" = "Family,\nno paid work", "retiree" = "Retiree", "working_age_employed" = "Working age,\nemployed", "working_age_unemployed" = "Working age,\nno paid work")) household_type_labels_spaced = as_labeller(c("family_employed_1" = "Family,\n1\nincome", "family_employed_2" = "Family,\n2+\nincomes", "family_unemployed" = "Family,\nno\npaid work", "retiree_other" = "Retiree,\nno\npension", "retiree_pension" = "Retiree,\nreceiving\npension", "working_age_employed" = "Working\nage,\nemployed", "working_age_unemployed" = "Working\nage,\nno\npaid work")) age_group_labels = as_labeller(c("under15s" = "Under 15", "15to24" = "15 to 24", "25to34" = "25 to 34", "35to44" = "35 to 44", "45to54" = "45 to 54", "55to64" = "55 to 64", "65plus" = "65+")) HESP_age_group_labels = as_labeller(c("15to24" = "15 to 24", "25to34" = "25 to 34", "35to44" = "35 to 44", "45to54" = "45 to 54", "55to64" = "55 to 64", "65plus" = "65+")) addline_format <- function(x,...){ gsub('\\s','\n',x) } spaced_income_type_labels = as_labeller(addline_format(c("eq_lab_inc" = "Labour income", "eq_cap_inc" = "Capital income", "eq_trans_inc" = "Transfer income", "eq_inc_tax" = "Income tax"))) poverty_labels= as_labeller(c(income = "Income", consumption = "Final consumption", financial_Headey_liquid_assets = "Financial", cons_no_in_kind = "Private consumption")) ######################################## # HES years - 3 years that HES overlaps with HILDA HES_years <- c(2004, 2010, 2016) ################# # cleaner variable names for age and houshold id, and household population weight, and responding person weight, and whether responding person, year of birth dat <- dat %>% mutate(age = hgage, # age hh_id = hhrhid, # household id hh_wt = hhwth, # household/enumerated person weight hhwt = hh_wt, # to match with HES rp_wt = hhwtrp, # responding person weight birth_year = hgyob) dat <- dat %>% mutate(rp = as.integer(hgint)-11 ) # responding person (1 = yes) ##################################################### # EQUIVALENCE SCALES # generate child variable dat <- mutate(dat, hhchild = hh0_4 + hh5_9 + hh10_14) # delete age based child variables dat <- within(dat, rm(hh0_4, hh5_9, hh10_14)) # generate OECD-modified equivalence scale variable dat <- mutate(dat, OECD_mod = 1 + ((hhadult-1)*(0.5)) + (hhchild*(0.3)) ) # Square root equivalence scale dat <- mutate(dat, square_root_eq = sqrt(hhadult + hhchild)) ##################################################### # INCOME VARIABLES # regular HILDA income variables are used (not total income, which include irregular income) to match with the ABS, and because more consistent prior to 2012 # see Appendix in Wilkins (2014) 'Derived income variables in the HILDA survey' for explanation, and http://www.abs.gov.au/ausstats/abs@.nsf/Lookup/by%20Subject/6503.0~2015-16~Main%20Features~Income~2 # We used the restricted (non-top-coded) version of HILDA - just for household disposable income # The restricted household disposable income values are pulled from a different file, not using PanelWhiz. the script is called # But our code will also work using the general (non-restricted) release. ################################################## # DISPOSABLE INCOME (general release) # generate real household disposable regular income variables (adjusted for inflation to 2016-17 dollars) - using top coded values. # dat <- mutate(dat, hifdip_rl = hifdip*deflator) # dat <- mutate(dat, hifdin_rl = hifdin*deflator) ################################################# ################################################# (skip this bit if using general release) # DISPOSABLE INCOME (restricted release) # read in R type file (income file - not top coded or bottom coded) HILDA_inc_res <- readRDS(file="H:\\Longitudinal Surveys\\Hilda R16 restricted\\hh_disp_inc_res.rds") # join to main data file dat <- left_join(dat, HILDA_inc_res, by=c("xwaveid", "year") ) # generate real household disposable reguler income variables (adjusted for inflation to 2016-17 dollars) - using top coded values. dat <- mutate(dat, hifdip_rl = hifdip_res*deflator) dat <- mutate(dat, hifdin_rl = hifdin_res*deflator) ################################################### # From this point either the general release or restricted release disposable income variables will work the same. # generate summed household disposable regular income variables (positive minus negative) dat <- mutate(dat, hh_disp_inc = hifdip_rl - hifdin_rl) # generate equivalised disposable income variables dat <- mutate(dat, eq_disp_inc = hh_disp_inc/OECD_mod) dat <- mutate(dat, eq_disp_incp = hifdip_rl/OECD_mod) dat <- mutate(dat, eq_disp_incn = hifdin_rl/OECD_mod) # equivalised disposable income using SQUARE ROOT scale for calculating ginis dat <- mutate(dat, sqrt_eq_disp_inc = hh_disp_inc/square_root_eq) # GROSS # generate real household gross regular income variables (adjusted for inflation to 2016-17 dollars) dat <- mutate(dat, hifefp_rl = hifefp*deflator) dat <- mutate(dat, hifefn_rl = hifefn*deflator) # generate summed household gross regular income variable (positive minus negative) dat <- mutate(dat, hh_gross_inc = hifefp_rl - hifefn_rl) # generate equivalised gross income variables dat <- mutate(dat, eq_gross_inc = hh_gross_inc/OECD_mod) dat <- mutate(dat, eq_gross_incp = hifefp_rl/OECD_mod) dat <- mutate(dat, eq_gross_incn = hifefn_rl/OECD_mod) # MARKET (PRIVATE) # using household regular private income (which includes private transfers) to match standard ABS definitions and Greenville 2013 (I think - Josh to confirm) # generate real household regular private income variables (adjusted for inflation to 2016-17 dollars) dat <- mutate(dat, hifpiip_rl = hifpiip*deflator) dat <- mutate(dat, hifpiin_rl = hifpiin*deflator) # generate summed household private regular income variable (positive minus negative) dat <- mutate(dat, hh_priv_inc = hifpiip_rl - hifpiin_rl) # generate equivalised regular private income dat <- mutate(dat, eq_priv_inc = hh_priv_inc/OECD_mod) dat <- mutate(dat, eq_priv_incp = hifpiip_rl/OECD_mod) dat <- mutate(dat, eq_priv_incn = hifpiin_rl/OECD_mod) # LABOUR # genereate inflation adjusted labour income variable (no negative for labour income) dat <- mutate(dat, hh_lab_inc = hiwsfei*deflator) # generate equivalised labour income variable dat <- mutate(dat, eq_lab_inc = hh_lab_inc/OECD_mod) # Break down disposable income into it's four constituent parts (labour, other private, govt transfers, tax) dat <- mutate(dat, eq_other_priv_inc = eq_priv_inc - eq_lab_inc) dat <- mutate(dat, eq_govt_trans = eq_gross_inc - eq_priv_inc) dat <- mutate(dat, eq_tax = eq_disp_inc - eq_gross_inc) # CAPITAL dat <- mutate(dat, eq_cap_inc = eq_priv_inc - eq_lab_inc) # same as definition for other private income dat <- mutate(dat, eq_trans_inc = eq_gross_inc - eq_priv_inc) # same as eq_govt_trans # Additional income variables dat <- mutate(dat, eq_bus_inc = ((hibifip-hibifin)*deflator)/OECD_mod) dat <- mutate(dat, eq_invest_inc = ((hifinip-hifinin)*deflator)/OECD_mod) dat <- mutate(dat, eq_priv_pensions = (hifppi*deflator)/OECD_mod ) dat <- mutate(dat, eq_priv_trans = (hifpti*deflator)/OECD_mod ) ############################################################### # WEALTH VARIABLES ################################################################ # WEALTH YEARS - 4 years that wealth module is included in HILDA wealth_years <- c(2002, 2006, 2010, 2014) # overdue household bills - set equal to 0 for 2002 (no overdue household bill data collected in that year) dat <- mutate(dat, eq_overdue_bills = ifelse(year==2002, 0, (hwobdti*deflator_lag)/OECD_mod) ) # NET WORTH # adjust for inflation and equivalise (and substract overdue bills which are not recorded for 2002) # positive wealth dat <- mutate(dat, eq_wealthp = (hwnwip*deflator_lag)/OECD_mod - eq_overdue_bills) # need to put eq_wealthp back to 0 for those with negative net wealth dat <- mutate(dat, eq_wealthp = ifelse(eq_wealthp < 0, 0, eq_wealthp)) # negative wealth dat <- mutate(dat, eq_wealthn = (hwnwin*deflator_lag)/OECD_mod + eq_overdue_bills) # need to put eq_wealthn back to 0 for those with positive net wealth dat <- mutate(dat, eq_wealthn = ifelse(eq_wealthp==0, eq_wealthn, 0)) # generate summed household wealth variable (positive minus negative) dat <- mutate(dat, eq_wealth = eq_wealthp - eq_wealthn) # UNequivalised wealth variables dat <- mutate(dat, uneq_wealth = eq_wealth*OECD_mod) # equivalised REAL wealth variables # various investments, and total dat <- mutate(dat, eq_equity_and_cash = ((hwcaini+hweqini)*deflator_lag)/OECD_mod) # equity investments plus cash investments (not cash holdings) dat <- mutate(dat, eq_trust_funds = (hwtrusi*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_investments = eq_equity_and_cash + eq_trust_funds) # home asset, debt, equity dat <- mutate(dat, eq_home = (hwhmvai*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_home_debt = (hwhmdti*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_home_equity = eq_home - eq_home_debt) # other property asset, debt, equity dat <- mutate(dat, eq_other_property = (hwopvai*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_other_property_debt = (hwopdti*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_other_property_equity = eq_other_property - eq_other_property_debt) # business equity = business assets minus business debt dat <- mutate(dat, eq_business_equity = ((hwbusvi-hwbusdi)*deflator_lag)/OECD_mod) # various assets dat <- mutate(dat, eq_bank = (hwtbani*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_super = (hwsupei*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_life_insurance = (hwinsui*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_collect = (hwcolli*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_vehicles = (hwvechi*deflator_lag)/OECD_mod) # various debts (put in negatives) dat <- mutate(dat, eq_credit_card = -(hwccdti*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_HECS = -(hwhecdi*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_other_personal_debt = -(hwothdi*deflator_lag)/OECD_mod) # 7 WEALTH categories # eq_home_equity # eq_super # eq_other_property_equity # eq_business_equity # eq_vehicles dat <- mutate(dat, eq_financial_wealth = eq_investments + eq_bank + eq_life_insurance + eq_other_personal_debt) dat <- mutate(dat, eq_personal_wealth = eq_collect + eq_credit_card + eq_HECS) # Other wealth dat <- mutate(dat, eq_other_wealth = eq_wealth - eq_super - eq_home_equity) # real, UNequivalised wealth variables dat <- mutate(dat, hh_home_equity = eq_home_equity*OECD_mod) dat <- mutate(dat, hh_super = eq_super*OECD_mod) dat <- mutate(dat, hh_wealth = eq_wealth*OECD_mod) # debt (not including business debt, which is netted as business equity) dat <- mutate(dat, eq_debt = eq_home_debt + eq_other_property_debt + eq_HECS + eq_credit_card + eq_other_personal_debt) # liquid assets for Headey poverty test dat <- dat %>% mutate(eq_liquid_assets = eq_equity_and_cash + eq_bank + ifelse(hhold>=65,eq_super,0)) #hhold is age of oldest person in the house ################################################################################### # DEMOGRAPHIC CATEGORIES ###################### # dependent children # add up total number of dependent children dat <- dat %>% mutate(depend_child = hhd0_4 + hhd5_9 + hhd1014 + hhd1524 ) # generate variable for number of dependent children in household dat_depend_child <- dat %>% select(year, hh_id, depend_child) %>% arrange(year, hh_id, desc(depend_child) ) %>% distinct(year, hh_id, .keep_all=TRUE) %>% # keep_all=True ensures that we keep the depend_child variable rename(hh_depend_child = depend_child) # add to main dataframe variable for number of dependent children in the household - hh_depend_child dat <- left_join(dat, dat_depend_child, by=c("year", "hh_id") ) rm(dat_depend_child) ################################ # check to see if everyone in the house is over 65 dat_all_65_plus <- dat %>% select(year, hh_id, xwaveid, age) %>% arrange(year, hh_id, age) %>% distinct(year, hh_id, .keep_all=TRUE) %>% # only keep the age of the person in the house mutate(all_65_plus = ifelse(age >= 65, yes=1, no=0)) %>% select(year, hh_id, all_65_plus) # add to main dataframe variable for whether all members are 65 plus. dat <- left_join(dat, dat_all_65_plus, by=c("year", "hh_id") ) rm(dat_all_65_plus) ################################ # number of employed people in the household - NOTE that quite a few adults in household are non-respondents, treated as not working. dat <- dat %>% mutate(lab_status = esbrd, lab_status_int = as.integer(lab_status) - 10, # as integer 1,2,3 read as 11,12,13 employed = ifelse(lab_status_int==1, yes=1, no=0) ) dat_employed <- dat %>% select(year, hh_id, employed) %>% group_by(year, hh_id) %>% summarise(hh_employed = sum(employed)) dat <- left_join(dat, dat_employed, by=c("year", "hh_id") ) rm(dat_employed) ################################# # does anyone in the household receive benefits. dat <- dat %>% mutate(any_benefit = ifelse(hicapi > 0, yes=1, no=0) ) # imputed current weekly Australian public transfers for household dat_benefit <- dat %>% select(year, hh_id, any_benefit) %>% arrange(year, hh_id, desc(any_benefit)) %>% # distinct(year, hh_id, .keep_all=TRUE) %>% #only keep the top (if any has benefits, this one will) rename(hh_benefit = any_benefit) dat <- left_join(dat, dat_benefit, by=c("year", "hh_id") ) rm(dat_benefit) ################################### # put all together to categorize each person according to their family type. # have put eq_lab_inc==0 in here to tighten up definitions of not employed criteria to account for non-responding adults dat <- dat %>% mutate(household_type = case_when( hh_depend_child>0 & (hh_employed==1 | (hh_employed==0 & eq_lab_inc>0)) ~ 1, # family employed 1 hh_depend_child>0 & hh_employed>=2 ~ 2, # family employed 2 or more hh_depend_child>0 & hh_employed==0 & eq_lab_inc==0 ~ 3, # family not employed hh_depend_child==0 & all_65_plus>0 & hh_employed==0 & eq_lab_inc==0 & any_benefit==0 ~ 4, # retiree self-funded hh_depend_child==0 & all_65_plus>0 & hh_employed==0 & eq_lab_inc==0 & any_benefit>0 ~ 5, # pensioners hh_depend_child==0 & (hh_employed>=1 | (hh_employed==0 & eq_lab_inc>0)) ~ 6, # working age employed hh_depend_child==0 & all_65_plus==0 & hh_employed==0 & eq_lab_inc==0 ~ 7, # working age not employed TRUE ~ -99) ) dat <- mutate(dat, household_type = factor(household_type, labels=c("family_employed_1", "family_employed_2", "family_unemployed", "retiree_other", "retiree_pension", "working_age_employed", "working_age_unemployed") ) ) # household types as strings dat <- dat %>% mutate(household_type_string = case_when( hh_depend_child>0 & (hh_employed==1 | (hh_employed==0 & eq_lab_inc>0)) ~ "family_employed_1", hh_depend_child>0 & hh_employed>=2 ~ "family_employed_2", hh_depend_child>0 & hh_employed==0 & eq_lab_inc==0 ~ "family_not_employed", hh_depend_child==0 & all_65_plus>0 & hh_employed==0 & eq_lab_inc==0 & any_benefit==0 ~ "retiree_other", hh_depend_child==0 & all_65_plus>0 & hh_employed==0 & eq_lab_inc==0 & any_benefit>0 ~ "retiree_pension", hh_depend_child==0 & (hh_employed>=1 | (hh_employed==0 & eq_lab_inc>0)) ~ "working_age_employed", hh_depend_child==0 & all_65_plus==0 & hh_employed==0 & eq_lab_inc==0 ~ "working age unemployed", TRUE ~ as.character(NA) ) ) dat <- dat %>% mutate(household_type_poverty = case_when(household_type_string == "family_employed_1" | household_type_string == "family_employed_2" ~ "family_employed", household_type_string == "retiree_pension" | household_type_string == "retiree_other" ~ "retiree", TRUE ~ household_type_string)) ############################################################## # AGE RANGES ################################################################# # create age group variable as factor, and label factors. dat <- dat %>% mutate(age_group = case_when( hgage < 15 ~ 1, # under 15 hgage >= 15 & hgage < 25 ~ 2, #15 to 24 hgage >= 25 & hgage < 35 ~ 3, #25 to 34 hgage >= 35 & hgage < 45 ~ 4, #35 to 44 hgage >= 45 & hgage < 55 ~ 5, #45 to 54 hgage >= 55 & hgage < 65 ~ 6, #55 to 64 hgage >= 65 ~ 7, #65 plus TRUE ~ -99 ) ) dat <- mutate(dat, age_group = factor(age_group, labels=c("under 15", "15 to 24", "25 to 34", "35 to 44", "45 to 54", "55 to 64", "65 plus"))) ######################################################################################################################## dat <- dat %>% mutate("under15s" = ifelse(hgage < 15, 1, 0), "15to24" = ifelse(hgage >= 15 & hgage < 25,1,0), "25to34" = ifelse(hgage >= 25 & hgage < 35, 1, 0), "35to44" = ifelse(hgage >= 35 & hgage < 45, 1, 0), "45to54" = ifelse(hgage >= 45 & hgage < 55, 1, 0), "55to64" = ifelse(hgage >= 55 & hgage < 65, 1, 0), "65plus" = ifelse(hgage >= 65, 1, 0) ) dat <- dat %>% mutate(age_group_string = case_when(hgage < 15 ~ "Under 15", `15to24` == 1 ~ "15 to 24", `25to34` == 1 ~ "25 to 34", `35to44` == 1 ~ "35 to 44", `45to54` == 1 ~ "45 to 54", `55to64` == 1 ~ "55 to 64", `65plus` == 1 ~ "65+")) ######################################################## # AGE COHORT VARIABLEs #Create birth decade variable dat <- dat %>% mutate("birth_decade" = case_when(birth_year > 1909 & birth_year < 1920 ~ "1910s", birth_year > 1919 & birth_year < 1930 ~ "1920s", birth_year > 1929 & birth_year < 1940 ~ "1930s", birth_year > 1939 & birth_year < 1950 ~ "1940s", birth_year > 1949 & birth_year < 1960 ~ "1950s", birth_year > 1959 & birth_year < 1970 ~ "1960s", birth_year > 1969 & birth_year < 1980 ~ "1970s", birth_year > 1979 & birth_year < 1990 ~ "1980s", birth_year > 1989 & birth_year < 2000 ~ "1990s", TRUE ~ NA_character_)) ################################################################ # CONSUMPTION VARIABLES ############################################################### # CONSUMPTION YEARS -years that have good consumption data cons_years <- as.numeric(c(2006:2016)) ############# #RENT # annualised rent dat <- mutate(dat, hh_act_rent = 12*hsrnti) # annualised expected rent for those living rent free dat <- mutate(dat, hh_exp_rent = 52*hsfa) # imputed rent using house valu - defined as equal to 5% of house value - for those that own/paying off home dat <- mutate(dat, hh_imp_rent = 0.05*hsvalui) # put it all together in one household rent variable (I have checked that only rent-buy people (see variable hstenr (hstenur for wave 1)) are positive in more than one of these three measures) dat <- dat %>% mutate(hh_rent = case_when( hh_act_rent >= 0 & hh_imp_rent <= 0 ~ hh_act_rent, # renters, excluding those in rent to buy schemes. I HAVE NOT added household repairs and maintenance to rent paid hh_imp_rent > 0 ~ hh_imp_rent, # home owners and those paying off mortage, and those in rent to buy schemes (use imputed rent in preference to actual rent - which may be contributing to capital) hh_exp_rent >= 0 & hh_imp_rent <= 0 ~ hh_exp_rent, # for those living rent free, expected rent TRUE ~ -1 # call rent -1 for those with missing values - 102 people in 2016 ) ) ###################################################################################### # build up consumption variables in wave 6-16 (patchy consumption data for earlier years) - using all imputed variables # make years integer - possibly needed for case_when's to work # dat <- mutate(dat, year=as.integer(year)) # 'food' type expenditure dat <- mutate(dat, hh_food = hxygrci # groceries + hxyalci # alcohol + hxycigi # cigarettes and tobacco + hxymli) # meals eaten out # non-durable transport expenditure dat <- mutate(dat, hh_transport = hxypbti # public transport and taxis + hxymvfi # motor vehicle fuel + hxymvri) # motor vehicle repairs and maintenance # clothing expenditure dat <- dat %>% mutate(hh_clothing = case_when( year >= 2006 ~ hxymcfi + hxywcfi + hxyccfi, # men's + women's + children's clothing and footwear TRUE ~ as.integer(0) ) ) # health expenditure - private health insurance included following ABS dat <- dat %>% mutate(hh_health = case_when( year >= 2006 ~ hxyhlpi # fees paid to health practitioner + hxyphmi # medicines, presciptions and pharmaceuticals + hxyphii, # private health insurance TRUE ~ as.integer(0) ) ) # utilities expenditure dat <- dat %>% mutate(hh_utilities = case_when( year >= 2006 ~ hxytlii # telephone rent and calls, internet charges + hxyutli, # electricity, gas bills and other heating fuel TRUE ~ as.integer(0) ) ) ############################### # equivalised REAL consumption variables # stuff in main consumption figures dat <- mutate(dat, eq_rent = (hh_rent*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_food = (hh_food*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_transport = (hh_transport*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_clothing = (hh_clothing*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_health = (hh_health*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_other_insurance = (hxyoii*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_utilities = (hh_utilities*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_education = (hxyedci*deflator_lag)/OECD_mod) dat <- mutate(dat, eq_childcare = (ccactci*deflator_lag)/OECD_mod) ############################# # headline consumption figure - not treating home repairs (hxyhmri) as consumption, but as investment (and then counting rent or gross imputed rent within consumption) dat <- mutate(dat, eq_cons = eq_rent + eq_food + eq_transport + eq_clothing + eq_health + eq_other_insurance #I am including other insurance in consumption, because according to HES, most insurance is vehicle insurance and other stuff (not home and contents, which we have excluded in HES) + eq_utilities + eq_education + eq_childcare )