# Productivity Commission 2018 analysis for the Rising Inequality? Commission Research Paper # Cross-sectional HILDA analysis # Need to run the top part of code, to get income, wealth, consumption deciles, etc. (up to line ~160) then can run bit by bit. ############################################################# # INCOME DECILES ############################################################# # weighted cut points cuts_wtd <- dat %>% group_by(year) %>% do(data.frame(t(wtd.quantile(.$eq_disp_inc, weights=.$hh_wt, probs=seq(from = 0, to = 1, by = 0.1) ) ) ) ) %>% # the t is a transpose ungroup() %>% rename(cut0=X..0., cut10=X.10., cut20=X.20., cut30=X.30., cut40=X.40., cut50=X.50., cut60=X.60., cut70=X.70., cut80=X.80., cut90=X.90., cut100=X100.) # add in 50% of income variable cuts_wtd <- mutate(cuts_wtd, inc_poverty_line = 0.5*cut50) # join weighted cut points to the dataframe dat <- left_join(dat, cuts_wtd, by="year") # assign each observation to a decile basked depending on relevant weighted cut points dat <- dat %>% mutate(inc_dec = case_when( eq_disp_inc <= cut10 ~ 1, eq_disp_inc > cut10 & eq_disp_inc <= cut20 ~ 2, eq_disp_inc > cut20 & eq_disp_inc <= cut30 ~ 3, eq_disp_inc > cut30 & eq_disp_inc <= cut40 ~ 4, eq_disp_inc > cut40 & eq_disp_inc <= cut50 ~ 5, eq_disp_inc > cut50 & eq_disp_inc <= cut60 ~ 6, eq_disp_inc > cut60 & eq_disp_inc <= cut70 ~ 7, eq_disp_inc > cut70 & eq_disp_inc <= cut80 ~ 8, eq_disp_inc > cut80 & eq_disp_inc <= cut90 ~ 9, eq_disp_inc > cut90 ~ 10, TRUE ~ -99 ) ) # write to csv for use externally # HILDA_cuts_wtd <- cuts_wtd %>% # select(-inc_poverty_line) # write.csv(x=HILDA_cuts_wtd, file= here("shared_dataframes", "HILDA_cuts_wtd.csv"), row.names=FALSE) # rm(HILDA_cuts_wtd) # assign each observation a 1 if equivalised household disposable income less than half of median, 0 otherwise dat <- dat %>% mutate(inc_poverty = case_when( eq_disp_inc < inc_poverty_line ~ 1, eq_disp_inc >= inc_poverty_line ~ 0, TRUE ~ -99) ) # Income ratios cuts_wtd <- mutate(cuts_wtd, ninety10 = cut90/cut10) cuts_wtd <- mutate(cuts_wtd, ninety50 = cut90/cut50) cuts_wtd <- mutate(cuts_wtd, fifty10 = cut50/cut10) ################################################################# # WEALTH DECILES ############################################################## # weighted cut points cuts_wealth <- dat %>% group_by(year) %>% filter(year %in% wealth_years) %>% do(data.frame(t(wtd.quantile(.$eq_wealth, weights=.$hh_wt, probs=seq(from = 0, to = 1, by = 0.1) ) ) ) ) %>% ungroup() %>% rename(wealth_cut0=X..0., wealth_cut10=X.10., wealth_cut20=X.20., wealth_cut30=X.30., wealth_cut40=X.40., wealth_cut50=X.50., wealth_cut60=X.60., wealth_cut70=X.70., wealth_cut80=X.80., wealth_cut90=X.90., wealth_cut100=X100.) # join weighted cut points to the dataframe dat <- left_join(dat, cuts_wealth, by="year") # assign each observation to a decile basked depending on relevant weighted cut points dat <- dat %>% mutate(wealth_dec = case_when( eq_wealth <= wealth_cut10 ~ 1, eq_wealth > wealth_cut10 & eq_wealth <= wealth_cut20 ~ 2, eq_wealth > wealth_cut20 & eq_wealth <= wealth_cut30 ~ 3, eq_wealth > wealth_cut30 & eq_wealth <= wealth_cut40 ~ 4, eq_wealth > wealth_cut40 & eq_wealth <= wealth_cut50 ~ 5, eq_wealth > wealth_cut50 & eq_wealth <= wealth_cut60 ~ 6, eq_wealth > wealth_cut60 & eq_wealth <= wealth_cut70 ~ 7, eq_wealth > wealth_cut70 & eq_wealth <= wealth_cut80 ~ 8, eq_wealth > wealth_cut80 & eq_wealth <= wealth_cut90 ~ 9, eq_wealth > wealth_cut90 ~ 10, TRUE ~ -99 ) ) # Wealth ratios cuts_wealth <- mutate(cuts_wealth, ninety10 = wealth_cut90/wealth_cut10) cuts_wealth <- mutate(cuts_wealth, ninety50 = wealth_cut90/wealth_cut50) cuts_wealth <- mutate(cuts_wealth, fifty10 = wealth_cut50/wealth_cut10) ############################################################################# # CONSUMPTION DECILES ########################################################################### # CREATE CONSUMPTION DATAFRAME # drop those not appropriate for consumption - generate new consumption data frame (observations before 2006 and those with negative (missing) rent) dat_cons <- dat %>% filter(year >= 2006) %>% subset(eq_rent >= 0) ## # CONSUMPTION DECILES # weighted cut points cuts_cons <- dat_cons %>% group_by(year) %>% do(data.frame(t(wtd.quantile(.$eq_cons, weights=.$hh_wt, probs=seq(from = 0, to = 1, by = 0.1) ) ) ) ) %>% ungroup() %>% rename(cons_cut0=X..0., cons_cut10=X.10., cons_cut20=X.20., cons_cut30=X.30., cons_cut40=X.40., cons_cut50=X.50., cons_cut60=X.60., cons_cut70=X.70., cons_cut80=X.80., cons_cut90=X.90., cons_cut100=X100.) # add in 50% of income variable cuts_cons <- mutate(cuts_cons, cons_poverty_line = 0.5*cons_cut50) # join weighted cut points to the dataframe dat_cons <- left_join(dat_cons, cuts_cons, by="year") # assign each observation to a decile basked depending on relevant weighted cut points dat_cons <- dat_cons %>% mutate(cons_dec = case_when( eq_cons <= cons_cut10 ~ 1, eq_cons > cons_cut10 & eq_cons <= cons_cut20 ~ 2, eq_cons > cons_cut20 & eq_cons <= cons_cut30 ~ 3, eq_cons > cons_cut30 & eq_cons <= cons_cut40 ~ 4, eq_cons > cons_cut40 & eq_cons <= cons_cut50 ~ 5, eq_cons > cons_cut50 & eq_cons <= cons_cut60 ~ 6, eq_cons > cons_cut60 & eq_cons <= cons_cut70 ~ 7, eq_cons > cons_cut70 & eq_cons <= cons_cut80 ~ 8, eq_cons > cons_cut80 & eq_cons <= cons_cut90 ~ 9, eq_cons > cons_cut90 ~ 10, TRUE ~ -99 ) ) # join consumption deciles to main frame cons_join <- dat_cons %>% select(xwaveid, year, cons_dec) dat <- left_join(dat, cons_join, by=c("xwaveid","year")) rm(cons_join) ############################ ######################################################################################################## # BEGINNING OF OPTIONAL ANALYSIS - CAN BE DONE BIT BY BIT ###################################################################################################### ####################################################################### # CHAPTER 3 - INCOME ####################################################################### ################# # figure 3.1- median and mean income for combined chart - HILDA combine with HES (figure 3.1) HILDA_income <- dat %>% select(year, eq_disp_inc, hh_wt) %>% group_by(year) %>% summarise(mean = wtd.mean(eq_disp_inc, weights=hh_wt), median = wtd.quantile(eq_disp_inc, weights=hh_wt, probs=0.5) ) %>% mutate(source="HILDA") # saveRDS(HILDA_income, file= here("shared_dataframes", "HILDA_income.rds") ) ################ # figure 3.2 - GINI COEFFICIENTS (to feed into HES/SIH code) (figure 3.2) ginis <- dat %>% select(year, hh_wt, eq_disp_inc, eq_gross_inc, eq_priv_inc, eq_lab_inc, eq_wealth, eq_cons) %>% gather(key = "variable", value = "value", c(-year, -hh_wt)) %>% mutate(value = ifelse(value >= 0, value, 0)) %>% # treat negative numbers as zeroes group_by(year, variable) %>% summarise(gini = gini(value, weights = hh_wt)) # saveRDS(ginis, file= here("shared_dataframes", "HILDA_ginis.rds") ) ############# # INCOME quantile RATIOS - for HES check (figure 3.3) cuts_wtd %>% select(year, ninety10, ninety50, fifty10) %>% gather(ratio, value, -year) %>% ggplot(aes(x=year, y=value, col=ratio) ) + geom_line(size=2) ######## # disposable income ginis using two equivalisation methods (OECD_mod and square root) - for HES check (feeds into figure 3.4) ginis <- dat %>% select(year, hh_wt, eq_disp_inc, sqrt_eq_disp_inc) %>% gather(key = "variable", value = "value", c(-year, -hh_wt)) %>% mutate(value = ifelse(value >= 0, value, 0)) %>% # treat all negative values as 0 group_by(year, variable) %>% summarise(gini = gini(value, weights = hh_wt)) %>% spread(key=variable, value=gini, drop=FALSE) # figure 3.5 - HILDA not available for time period ############## # figure 3.6 - Income growth rates vary by period - for HES check (figure 3.6) dat %>% select(year,hh_wt, inc_dec, eq_disp_inc) %>% group_by(year, inc_dec) %>% summarise(mean_value = (wtd.mean(eq_disp_inc, weight = hh_wt))) %>% ungroup() %>% arrange(inc_dec,year) %>% filter(year %in% c(2004, 2010, 2016)) %>% mutate(percent=(((mean_value/lag(mean_value))^(1/(year-lag(year)))) - 1)*100) %>% filter(year!=2004) %>% ggplot(aes(x = factor(year), y = percent, fill = inc_dec, group = inc_dec))+ geom_col(position="dodge")+ scale_y_continuous(labels = comma, expand=c(0.01,0.01))+ scale_x_discrete(expand = c(0.01, 0.01))+ labs(x = "HES period", y = "Per cent", fill = "Income decile:")+ PCcols_gradient+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.title = element_text(), axis.line.x = element_blank())+ geom_hline(yintercept = 0, colour = grey) # figure 3.7 - HILDA not available for time period ################# # TOP INCOME CHARTS (figure 3.8) #Top 1 per cent column dat <- dat %>% group_by(year) %>% mutate(top_one_per_cent = ifelse(eq_disp_inc > wtd.quantile(eq_disp_inc, weights=hh_wt, probs=0.99), 1, 0)) %>% mutate(ninety_nine_per_cent = abs(top_one_per_cent-1)) #Top 1 per cent mean income growth - 2.3% p.a. 2001-2016 dat %>% group_by(year) %>% summarise(mean_value = wtd.mean(eq_disp_inc, weights=hh_wt*top_one_per_cent)) %>% mutate(one_per_cent_growth = (((mean_value/lag(mean_value,15))^(1/(year-lag(year,15)))) - 1)*100) # income growth for whole population - 1.9% dat %>% group_by(year) %>% summarise(mean_value = wtd.mean(eq_disp_inc, weights=hh_wt)) %>% mutate(one_per_cent_growth = (((mean_value/lag(mean_value,15))^(1/(year-lag(year,15)))) - 1)*100) #Top 1 per cent income share - 4.8% in 2016 dat %>% select(year, hh_wt, eq_disp_inc, top_one_per_cent) %>% group_by(year) %>% summarise(share = sum(eq_disp_inc*hh_wt*top_one_per_cent)/sum(eq_disp_inc*hh_wt)*100) #Gini comparison with and without top 1 per cent - no negative values dat %>% select(year, hh_wt, eq_disp_incp) %>% group_by(year) %>% mutate(ninety_nine_per_cent_indicator = ifelse(eq_disp_incp < wtd.quantile(eq_disp_incp, weights=hh_wt, probs=0.99), 1, 0)) %>% summarise(total_gini = gini(eq_disp_incp, weights = hh_wt), ninety_nine_per_cent_gini = gini(eq_disp_incp, weights = hh_wt*ninety_nine_per_cent_indicator), per_cent_difference = (total_gini - ninety_nine_per_cent_gini)/total_gini*100) ########### # Figure 3.9 - Income mostly comes from labour for most households (Figure 3.9) dat %>% select(year,hh_wt, inc_dec, eq_lab_inc, eq_cap_inc, eq_trans_inc, eq_tax, eq_gross_inc) %>% filter(year==2016) %>% gather(inc_type,value,c(-year,-inc_dec, -hh_wt, -eq_gross_inc)) %>% group_by(year, inc_type,inc_dec) %>% summarise(percent = 100*sum(value*hh_wt)/sum(eq_gross_inc*hh_wt)) %>% ggplot(aes(x = factor(inc_type), y = percent, fill = inc_dec, group = inc_dec))+ geom_col(position = "dodge")+ scale_x_discrete( expand = c(0.01, 0.01))+ scale_y_continuous(expand=c(0.0,0.0), limits = c(-40,120))+ PCcols_gradient+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.title = element_text(), axis.line.x = element_blank())+ labs(x="Income type", y = "Per cent", fill="Income decile:")+ geom_hline(yintercept = 0, colour = grey)+ #Add this to get a horizontal axis line at zero guides(fill=guide_legend(nrow=1, byrow=TRUE)) # Figure 3.10 - HILDA doesn't cover this span of time ################### # Figure 3.11 - Taxes and transfers consistently reduce inequality dat %>% select(year, hh_wt, eq_disp_inc, eq_gross_inc, eq_priv_inc) %>% gather(key = "variable", value = "value", c(-year, -hh_wt)) %>% mutate(value = ifelse(value >= 0, value, 0)) %>% # treat negative numbers as zeroes group_by(year, variable) %>% summarise(gini = gini(value, weights = hh_wt)) %>% ggplot(aes(x=year, y=gini, col= variable, group=variable)) + geom_line() ################# # Figure 3.12 - Changes in labour and transfer income have driven overall trends in inequality. marginal effect on the gini. (Figure 3.12) ##Marginal effects of income components #Effect of income tax on disposable income marginal_disp_ginis <- dat %>% select(year, hh_wt, eq_disp_inc, eq_gross_inc) %>% mutate(eq_disp_inc_non_zero = ifelse(eq_disp_inc > 0, eq_disp_inc,0)) %>% mutate(eq_gross_inc_non_zero = ifelse(eq_gross_inc > 0, eq_gross_inc,0)) %>% group_by(year) %>% summarise(eq_disp_inc_gini = gini(eq_disp_inc_non_zero, weights = hh_wt), eq_gross_inc_gini = gini(eq_gross_inc_non_zero, weights = hh_wt), marginal_effect = eq_disp_inc_gini-eq_gross_inc_gini) #Effect of labour, capital and transfers on gross income marginal_gross_ginis <- dat %>% select(year, hh_wt, eq_gross_inc, eq_lab_inc, eq_cap_inc, eq_trans_inc) %>% mutate(gross_total = ifelse(eq_gross_inc > 0, eq_gross_inc,0), gross_less_lab = ifelse(eq_gross_inc - eq_lab_inc > 0, eq_gross_inc - eq_lab_inc,0), gross_less_cap = ifelse(eq_gross_inc - eq_cap_inc > 0, eq_gross_inc - eq_cap_inc,0), gross_less_trans = ifelse(eq_gross_inc - eq_trans_inc > 0, eq_gross_inc - eq_trans_inc,0)) %>% gather(key = "variable", value = "value", c(gross_less_lab, gross_less_cap, gross_less_trans)) %>% group_by(year, variable) %>% summarise(gross_gini = gini(gross_total, weights = hh_wt), gross_less_gini = gini(value, weights = hh_wt), marginal_effect = gross_gini-gross_less_gini) #Combining all marginal effects marginal_disp_ginis <- marginal_disp_ginis %>% mutate(variable = "disp_less_tax") marginal_ginis <- bind_rows(marginal_gross_ginis,marginal_disp_ginis) #figure 3.12a # emf(file = here("HES","Charts","marginal_gini_effects_tax.emf"), width = 2.95276, height = 2.3622) marginal_ginis %>% filter(variable=="disp_less_tax") %>% ggplot(aes(x=year,y=marginal_effect, col = variable)) + geom_line(size=1)+ geom_point(size=2)+ # scale_x_continuous(breaks = HES_years, labels = short_fin_year_labels, position = "top")+ scale_color_manual(values=PCcols)+ xlab("Year")+ ylab("Change in Gini coefficient")+ ylim(-0.1,0)+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), strip.background = element_rect(fill = grey), strip.text = element_text(face = "bold"), panel.border = element_rect(colour = grey, fill = NA), legend.position = "none") # dev.off() #figure 3.12b # emf(file = here("HES","Charts","marginal_gini_effects_capital.emf"), width = 2.95276, height = 2.3622) marginal_ginis %>% filter(variable=="gross_less_cap") %>% ggplot(aes(x=year,y=marginal_effect, col = variable)) + geom_line(size=1)+ geom_point(size=2)+ # scale_x_continuous(breaks = HES_years, labels = short_fin_year_labels, position = "top")+ scale_color_manual(values=PCcols)+ xlab("Year")+ ylab("Change in Gini coefficient")+ ylim(-0.1,0)+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), strip.background = element_rect(fill = grey), strip.text = element_text(face = "bold"), panel.border = element_rect(colour = grey, fill = NA), legend.position = "none") # dev.off() #figure 3.12c # emf(file = here("HES","Charts","marginal_gini_effects_labour.emf"), width = 2.95276, height = 2.3622) marginal_ginis %>% filter(variable=="gross_less_lab") %>% ggplot(aes(x=year,y=marginal_effect, col = variable)) + geom_line(size=1)+ geom_point(size=2)+ # scale_x_continuous(breaks = HES_years, labels = short_fin_year_labels, position = "top")+ scale_color_manual(values=PCcols)+ xlab("Year")+ ylab("Change in Gini coefficient")+ ylim(-0.35,-0.25)+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), strip.background = element_rect(fill = grey), strip.text = element_text(face = "bold"), panel.border = element_rect(colour = grey, fill = NA), legend.position = "none") # dev.off() #figure 3.12d # emf(file = here("HES","Charts","marginal_gini_effects_transfer.emf"), width = 2.95276, height = 2.3622) marginal_ginis %>% filter(variable=="gross_less_trans") %>% ggplot(aes(x=year,y=marginal_effect, col = variable)) + geom_line(size=1)+ geom_point(size=2)+ scale_x_continuous(breaks = HES_years, position = "top")+ scale_color_manual(values=PCcols)+ xlab("Year")+ ylab("Change in Gini coefficient")+ ylim(-0.15,-0.05)+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), strip.background = element_rect(fill = grey), strip.text = element_text(face = "bold"), panel.border = element_rect(colour = grey, fill = NA), legend.position = "none") # dev.off() # figure 3.13 - HILDA doesn't ahve the same welfare payment splits ######################################################################### # Section 3.3 - the demographics of the income distribution ########### # Figure 3.14 - Representation in income deciles varies by age - check HES (Figure 3.14) # Share of people of a given age in each income decile, 2015-16 dat %>% filter(year==2016) %>% group_by(inc_dec, age_group_string) %>% summarise(pop = sum(hhwt)) %>% ungroup() %>% group_by(age_group_string) %>% mutate(percent = pop/sum(pop)) %>% ggplot(aes(x=factor(age_group_string), y=percent*100, fill=inc_dec, group=inc_dec ) ) + # geom_hline(yintercept = 10)+ geom_col(position = "dodge")+ scale_x_discrete(expand = c(0.1, 0.1))+ scale_y_continuous(expand=c(0,0), limits = c(0,25))+ PCcols_gradient+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.title = element_text()) + labs(x = "Age group", y = "Per cent", fill = "Income decile")+ geom_hline(yintercept = 10, colour = grey, size = 0.5, linetype = "dashed")+ guides(fill=guide_legend(nrow=1, byrow=TRUE)) ############# # Figure 3.15 - Young people have had the worst of recent weak growth - check HES (Figure 3.15) dat %>% select(year, hh_wt, eq_disp_inc, age_group) %>% filter(!is.na(age_group)) %>% group_by(year, age_group) %>% summarise(mean_value = (wtd.mean(eq_disp_inc, weight = hh_wt))) %>% ungroup() %>% arrange(age_group,year) %>% filter(year %in% c(2004,2010,2016)) %>% mutate(percent=(((mean_value/lag(mean_value))^(1/(year-lag(year)))) - 1)*100) %>% filter(year %in% c(2010,2016)) %>% arrange(year) %>% ggplot(aes(x = factor(year), y = percent, fill = age_group, group = age_group))+ geom_col(position="dodge")+ scale_y_continuous(labels = comma, expand=c(0.01,0.01))+ scale_x_discrete(breaks = HES_years, labels = HESP_period_labels, expand = c(0.01, 0.01))+ labs(x = "HES period", y = "Per cent", fill = "Age group:")+ scale_fill_manual(values = PCcols)+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.title = element_text(), axis.line.x = element_blank())+ geom_hline(yintercept = 0, colour = grey)+ guides(fill=guide_legend(nrow=1, byrow=TRUE)) ############ #Mean disposable income by age and birth decade - as per HES (figure 3.16) birth_decade_data <- dat %>% group_by(year, birth_decade) %>% filter(!is.na(birth_decade)) %>% summarise(disp_inc = weighted.mean(eq_disp_inc, hh_wt), age = weighted.mean(age, hh_wt)) %>% # used average of the ages of cohort by birth decade filter(age <= 65) ggplot(birth_decade_data, aes(x = age, y = disp_inc, group = birth_decade, col = birth_decade))+ geom_point()+ geom_line()+ scale_y_continuous(expand=c(0,0), limits = c(0,70000), labels = comma)+ scale_x_continuous(expand = c(0.01, 0.01), breaks = c(seq(15,65,5)), limits = c(15,65))+ scale_color_manual(values=PCcols)+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), axis.title.y = element_blank(), legend.position = "none", legend.title.align = 0.5, legend.title = element_text()) + labs(x = "Age", col = paste0("Decade",'\n', "of birth")) + geom_label_repel(aes(label = birth_decade), data = subset(birth_decade_data, year == 2016 | (birth_decade == "1940s" & year == 2010)), nudge_x = 0, nudge_y = 3100, na.rm = TRUE, label.size = NA) ############# # Figure 3.17 - group by household type and show proportion in each decile - INCOME (income dec) (Figure 3.17) dat %>% filter(year==2016) %>% group_by(household_type, inc_dec) %>% summarise(group_total = sum(hh_wt)) %>% ungroup() %>% group_by(household_type) %>% mutate(percent = group_total/sum(group_total)*100)%>% ggplot(aes(x = household_type, y=percent, fill=inc_dec, group=inc_dec)) + # geom_hline(yintercept = 10)+ geom_col(position = "dodge")+ scale_x_discrete(expand = c(0.1, 0.1), labels = household_type_labels_spaced)+ scale_y_continuous(expand=c(0,0), breaks = seq(0,60,10))+ PCcols_gradient+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.title = element_text()) + labs(x = "Household type", y = "Per cent", fill = "Income decile:")+ geom_hline(yintercept = 10, colour = grey, size = 0.5, linetype = "dashed")+ guides(fill=guide_legend(nrow=1, byrow=TRUE)) ####################################################################### # Section 3.4 # Figure 3.18 - Average final equivalised consumption by income decile - can't do in HILDA as no in kind transfers, also no savings. ######### # figure 3.19 - income and consumption by income deciles (figure 3.19) - can't do in HILDA as no in kind transfers, also no savings. dat %>% select(year, hhwt, inc_dec, eq_disp_inc, eq_cons) %>% gather(variable, value, -year, -hhwt, -inc_dec) %>% # mutate(variable = factor(variable, levels = consumption_levels)) %>% filter(year==2016) %>% group_by(variable, inc_dec) %>% summarise(mean_value = (wtd.mean(value, weight = hhwt))) %>% ggplot(aes(x = factor(inc_dec), y = mean_value, fill = variable, group = variable))+ geom_col(position="dodge")+ scale_y_continuous(labels = comma, expand=c(0,0), limits = c(0,150000))+ scale_x_discrete(expand = c(0.01, 0.01), labels = inc_dec_labels)+ scale_fill_manual(values = PCcols)+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.position = "none")+ facet_wrap(~variable, labeller = consumption_labels)+ theme(legend.position = "none", legend.title=element_blank(), strip.background = element_rect(fill = grey), strip.text = element_text(face = "bold"), panel.border = element_rect(colour = grey, fill = NA), axis.title.y = element_blank(), axis.title.x = element_blank(), axis.text.x = element_text(angle = 90, hjust = 1), plot.margin=unit(c(1,1,1.5,1.2),"cm")) ############## # Figure 3.20 - Gini coefficients for equivalised disposable income, private consumption and final consumption (figrue 3.20) # no final consumption available for HILDA dat %>% select(year, hh_wt, eq_disp_inc, eq_cons) %>% mutate(eq_cons = ifelse(year<2006, 0.2, eq_cons)) %>% # fudge so that ggplot works with na's gather(key = "variable", value = "value", c(-year, -hh_wt)) %>% mutate(value = ifelse(value >= 0, value, 0)) %>% # treat negative numbers as zeroes group_by(year, variable) %>% summarise(gini = gini(value, weights = hh_wt)) %>% ggplot(aes(x=year, y=gini, col=variable )) + geom_point(size=1) + geom_line(size=0.5) + scale_x_continuous(labels = wealth_fin_year_labels, breaks=seq(2002, 2015, 2), name="Year" ) + scale_y_continuous(labels=comma, name="Gini coefficient", limits = c(0, 0.4)) + scale_color_manual(values=PCcols) # figure 3.21 - no in kind transfers in HILDA so no check on HES # Figure 3.22 - no in kind transfers in HILDA so can't do final consumption deciles. Disposable income deciles in figure 3.17 above. ####################################################################### # CHAPTER 4 - WEALTH ####################################################################### ####### # Figure 4.1 - WEALTH MEANS AND MEDIANS - JOINT CHART WITH SIH - 'average wealth has steadily increased'. (FIGURE 4.1) SIH_wealth <- readRDS(file= here("shared_dataframes", "SIH_wealth_mean_and_median.rds") ) SIH_wealth <- mutate(SIH_wealth, source="SIH", year = year-1) HILDA_wealth <- dat %>% select(year, eq_wealth, hh_wt) %>% filter(year %in% wealth_years) %>% group_by(year) %>% summarise(mean = wtd.mean(eq_wealth, weights=hh_wt), median = wtd.quantile(eq_wealth, weights=hh_wt, probs=0.5) ) %>% mutate(source="HILDA") # emf(file = here("HILDA", "charts", "wealth_means_medians.emf"), width = 5.5, height = 3.14961) bind_rows(SIH_wealth, HILDA_wealth) %>% rename(Average=mean, Median=median) %>% gather(key="measure", value="value", -year, -source) %>% ggplot(aes(x=year, y=value, col=source, shape=measure )) + geom_point(size=2) + geom_line(size=1.05) + scale_x_continuous(expand = c(0.03, 0.03), labels = wealth_fin_year_labels, breaks=seq(2002, 2015, 2), name="Year") + scale_y_continuous(expand = c(0, 0), labels=comma, name="Dollars", limits=c(0,600000)) + scale_color_manual(values=PCcols)+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.position = "none" ) # dev.off() # calculate percentage changes - annualised SIH_wealth %>% arrange(year) %>% mutate(perc_growth_mean = ((mean/lag(mean,5))^(1/(year-lag(year,5))) - 1)*100, perc_growth_median = ((median/lag(median,5))^(1/(year-lag(year,5))) - 1)*100) HILDA_wealth %>% arrange(year) %>% mutate(perc_growth_mean = ((mean/lag(mean,3))^(1/(year-lag(year,3))) - 1)*100, perc_growth_median = ((median/lag(median,3))^(1/(year-lag(year,3))) - 1)*100) ########### # Figure 4.2 - average wealth by wealth deciles check against HES (figure 4.2) dat %>% select(year, hh_wt, wealth_dec, eq_wealth) %>% filter(year %in% c(2002,2014)) %>% group_by(year, wealth_dec) %>% summarise(mean_value = wtd.mean(eq_wealth, hh_wt)) %>% ggplot(aes(x = factor(wealth_dec), y = mean_value, fill = factor(year), group = year))+ geom_col(position="dodge")+ scale_y_continuous(labels = comma, expand = c(0, 0))+ scale_x_discrete(expand = c(0.01, 0.01))+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"))+ scale_fill_manual(values = PCcols, labels = wealth_fin_year_labels) + labs(x = "Wealth decile", y = "Dollars", fill = "Year") ###### # explore fall in WEALTH in BOTTOM decile - absolute changes (home and other property on equity basis) - footnote comparing to HES # explore numbers for bottom decile (not including eq_life_insurance) dat %>% filter(year==2014, wealth_dec==1) %>% select(xwaveid, year, hh_wt, wealth_dec, eq_bank, eq_investments, eq_super, eq_home_equity, eq_other_property_equity, eq_business_equity, eq_collect, eq_vehicles, eq_other_personal_debt, eq_credit_card, eq_HECS ) %>% gather(asset_type, value,-xwaveid, -year, -hh_wt, -wealth_dec) %>% group_by(asset_type, wealth_dec) %>% summarise(group_total = wtd.mean(value, hh_wt) ) %>% View() # changes dat %>% filter(year %in% wealth_years) %>% filter(wealth_dec==1) %>% select(xwaveid, year, hh_wt, eq_bank, eq_equity_and_cash, eq_trust_funds, eq_super, eq_life_insurance, eq_home_equity, eq_business_equity, eq_collect, eq_vehicles, eq_credit_card, eq_HECS, eq_other_personal_debt, eq_other_property_equity, eq_overdue_bills) %>% # have excluded total wealth gather(asset_type, value,-xwaveid, -year, -hh_wt) %>% group_by(asset_type, year) %>% summarise(group_total = wtd.mean(value, hh_wt) ) %>% ungroup() %>% arrange(asset_type, year) %>% mutate(abs_change = (group_total - lag(group_total, 3) ) ) %>% filter(year==2014) %>% ggplot(aes(x=asset_type, y=abs_change))+ geom_col(position="dodge") + theme(axis.text.x = element_text(angle = 90)) + scale_y_continuous(labels=comma) + PCcols_gradient ########### # Figure 4.3 - percentage growth in wealth by wealth decile - matches broadly with HES results in wealth chapter. - annualised basis (figure 4.3) dat %>% select(eq_wealth, hh_wt, xwaveid, year, wealth_dec) %>% filter(year %in% c(2002, 2014) ) %>% group_by(year, wealth_dec) %>% summarise(avg_wealth = wtd.mean(eq_wealth, weights = hh_wt) ) %>% ungroup() %>% arrange(wealth_dec, year) %>% mutate(perc_growth = ((avg_wealth/lag(avg_wealth))^(1/(year-lag(year))) - 1)*100 ) %>% filter(year==2014) %>% ggplot(aes(x=wealth_dec, y=perc_growth, fill=as.factor(wealth_dec) ) ) + geom_col(position ="dodge")+ scale_y_continuous(name="Annual growth in wealth (per cent)") + scale_x_continuous(breaks=seq(1,10,1), name="Wealth decile")+ theme(legend.position = "bottom",legend.title=element_blank() ) ######## # consumption ginis - joint chart with HES for presentation on release HES_cons_gini <- readRDS(file= here("shared_dataframes", "HES_ginis.rds") ) HES_cons_gini <- HES_cons_gini %>% ungroup() %>% mutate(source="HES") %>% filter(variable == "eq_cons" | variable == "eq_cons_no_inkind" ) %>% filter(year>1990) HILDA_cons_gini <- dat %>% select(year, eq_cons, hh_wt) %>% filter(year >= 2006) %>% group_by(year) %>% summarise(gini = gini(eq_cons, hh_wt) ) %>% mutate(variable ="HILDA") # consumption ginis chart for presentation on dark background # emf(file = here("HILDA", "charts", "cons_ginis_presentation.emf"), width = 5.90551, height = 3.14961) bind_rows(HES_cons_gini, HILDA_cons_gini) %>% ggplot(aes(x=year, y=gini, col=variable )) + # geom_point(size=2) + geom_line(size=1.05) + scale_x_continuous(expand = c(0.03, 0.03), labels = inc_fin_year_labels, breaks=seq(1994, 2016, 3), name="Year" ) + scale_y_continuous(expand = c(0, 0), labels=comma, name="Gini coefficient", limits = c(0.0, 0.4)) + PC.theme.line() + scale_color_manual(values=c(yellow, blue, PC_green))+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.position = "none", axis.text.x = element_text(colour=white), axis.text.y = element_text(colour=white), axis.title.x = element_text(colour=white), axis.title.y = element_text(colour=white)) # dev.off() ####### # Figure 4.4. - WEALTH GINIS - JOINT CHART WITH SIH (figure 4.4) SIH_wealth_gini <- readRDS(file= here("shared_dataframes", "SIH_wealth_ginis.rds") ) names(SIH_wealth_gini)[1] <- "year" names(SIH_wealth_gini) [2] <- "gini" SIH_wealth_gini <- mutate(SIH_wealth_gini, source="SIH", year = year-1) # because years are out of sync in HES v HILDA SIH_wealth_gini <- filter(SIH_wealth_gini, year>2002, year!=2007) HILDA_wealth_gini <- dat %>% select(year, eq_wealth, hh_wt) %>% filter(year %in% wealth_years) %>% group_by(year) %>% summarise(gini = gini(eq_wealth, hh_wt) ) %>% mutate(source="HILDA") # income get all together HILDA_income_gini <- dat %>% select(year, eq_disp_inc, hh_wt) %>% group_by(year) %>% summarise(gini = gini(eq_disp_inc, hh_wt) ) %>% mutate(source="HILDA", variable = "eq_disp_inc") SIHHES_income_gini <- readRDS(file= here("shared_dataframes", "SIHHES_income_ginis.rds") ) %>% select(-c(eq_disp_incp, eq_yearly_disp_inc) ) SIHHES_HILDA_income_ginis <- bind_rows(SIHHES_income_gini, HILDA_income_gini) SIHHES_HILDA_income_ginis <- SIHHES_HILDA_income_ginis %>% ungroup() %>% mutate(year=year-1) # to match transformed wealth ginis ####################### need to be careful if using this for other stuff. # wealth get all together SIH_HILDA_wealth_ginis <- bind_rows(SIH_wealth_gini, HILDA_wealth_gini) SIH_HILDA_wealth_ginis <- SIH_HILDA_wealth_ginis %>% mutate(variable = "eq_wealth") # income and wealth all together SIHHES_HILDA_income_wealth_ginis <- bind_rows(SIHHES_HILDA_income_ginis, SIH_HILDA_wealth_ginis) # wealth ginis chart (SIH and HILDA) - figure 4.4 # emf(file = here("HILDA", "charts", "wealth_ginis.emf"), width = 5.90551, height = 3.14961) bind_rows(SIH_wealth_gini, HILDA_wealth_gini) %>% ggplot(aes(x=year, y=gini, col=source )) + geom_point(size=2) + geom_line(size=1.05) + scale_x_continuous(expand = c(0.03, 0.03), labels = wealth_fin_year_labels, breaks=seq(2002, 2015, 2), name="Year" ) + scale_y_continuous(expand = c(0, 0), labels=comma, name="Gini coefficient", limits = c(0.5, 0.65)) + scale_color_manual(values=PCcols)+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.position = "none") # dev.off() # same as above but for presentation on dark background, # emf(file = here("HILDA", "charts", "wealth_ginis_presentation_full.emf"), width = 5.90551, height = 3.14961) bind_rows(SIH_wealth_gini, HILDA_wealth_gini) %>% ggplot(aes(x=year, y=gini, col=source )) + # geom_point(size=2) + geom_line(size=1.05) + scale_x_continuous(expand = c(0.03, 0.03), labels = wealth_fin_year_labels, breaks=seq(2002, 2015, 2), name="Year" ) + scale_y_continuous(expand = c(0, 0), labels=comma, name="Gini coefficient", limits = c(0.5, 0.65)) + scale_color_manual(values=PCcolsPresRev)+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.position = "none", axis.text.x = element_text(colour=white), axis.text.y = element_text(colour=white), axis.title.x = element_text(colour=white), axis.title.y = element_text(colour=white)) # dev.off() # same as above but for presentation on dark background, half size, no points # emf(file = here("HILDA", "charts", "wealth_ginis_presentation_half.emf"), width = 2.95, height = 3.14961) bind_rows(SIH_wealth_gini, HILDA_wealth_gini) %>% ggplot(aes(x=year, y=gini, col=source )) + # geom_point(size=2) + geom_line(size=1.05) + scale_x_continuous(expand = c(0.03, 0.03), labels = short_wealth_fin_year_labels, breaks=c(1988, 1993, 1998, 2003, 2009, 2015), limits=c(1988, 2015), name="Year" ) + # breaks=seq(2002, 2015, 2), name="Year" ) + scale_y_continuous(expand = c(0, 0), labels=comma, name="Gini coefficient", limits = c(0.2, 0.7)) + scale_color_manual(values=PCcolsPresRev)+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.position = "none", axis.text.x = element_text(colour=white), axis.text.y = element_text(colour=white), axis.title.x = element_text(colour=white), axis.title.y = element_text(colour=white)) # dev.off() # same as above but for presentation on dark background, full size, no points # emf(file = here("HILDA", "charts", "wealth_ginis_presentation_ful.emf"), width = 2.95, height = 3.14961) bind_rows(SIH_wealth_gini, HILDA_wealth_gini) %>% ggplot(aes(x=year, y=gini, col=source )) + # geom_point(size=2) + geom_line(size=1.05) + scale_x_continuous(expand = c(0.03, 0.03), labels = short_wealth_fin_year_labels, breaks=c(1988, 1993, 1998, 2003, 2009, 2015), limits=c(1988, 2015), name="Year" ) + # breaks=seq(2002, 2015, 2), name="Year" ) + scale_y_continuous(expand = c(0, 0), labels=comma, name="Gini coefficient", limits = c(0.2, 0.7)) + scale_color_manual(values=PCcolsPresRev)+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.position = "none", axis.text.x = element_text(colour=white), axis.text.y = element_text(colour=white), axis.title.x = element_text(colour=white), axis.title.y = element_text(colour=white)) # dev.off() # income and wealth, HES/SIH and HILDA - all combined for Press Club Presentation. # svg(file = here("HILDA", "charts", "SIHHES_HILDA_income_wealth_ginis.svg")) # emf(file = here("HILDA", "charts", "SIHHES_HILDA_income_wealth_ginis.emf"), width = 5.90551, height = 3.14961) # chart SIHHES_HILDA_income_wealth_ginis %>% ggplot(aes(x=year, y=gini, col=source, group=interaction(source, variable) ) ) + # geom_point(size=2) + geom_line(size=1.05) + scale_x_continuous(expand = c(0.03, 0.03), labels = wealth_fin_year_labels, breaks=seq(1988, 2015, 3), name="Year" ) + scale_y_continuous(expand = c(0, 0), labels=comma, name="Gini coefficient", limits = c(0, 0.7), breaks = seq(0,0.7,0.1)) + scale_color_manual(values=PCcols)+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.position = "none") # dev.off() ######################################### # Figure 4.5 wealth chapter - percentage of wealth held by top 10% - checking SIH ####################################### dat %>% select(wealth_dec, hhwt, eq_wealth, year) %>% group_by(year, wealth_dec) %>% summarise(group_total = sum(eq_wealth*hhwt)) %>% ungroup() %>% group_by(year) %>% mutate(perc = 100*group_total/sum(group_total)) %>% arrange(wealth_dec) %>% mutate(culm = cumsum(perc) ) %>% mutate(perc = ifelse(wealth_dec!=10, culm, perc) ) %>% filter(wealth_dec %in% c(5,10)) %>% ggplot(aes(x=year, y=perc, col=as.factor(wealth_dec) ) ) + geom_line(size = 1.05) + geom_point(size = 2) + scale_x_continuous(expand=c(0.03, 0.03), labels = wealth_fin_year_labels, breaks=wealth_years, name="Year") + scale_y_continuous(expand = c(0,0), labels=comma, limits=c(0,50), breaks=seq(0,50, 5),name="Per cent") + scale_color_manual(values=PCcols)+ theme(legend.position = "none", plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm")) ############## # Figure 4.6 - OECD means and medians 2017 (USD PPP) (figure 4.6) OECD_wealth <- read.csv(here("wealth means and medians OECD 2017.csv"), header=TRUE) OECD_wealth <- tbl_df(OECD_wealth) # emf(file = here("HILDA", "charts", "OECD_wealth_means_and_medians.emf"), width = 5.90551, height = 3.14961) OECD_wealth %>% mutate(highlight = ifelse(Country=='Australia', 1, 0)) %>% arrange(Average) %>% mutate(one = 1, counting = cumsum(one)) %>% ggplot() + geom_col(aes(x = reorder(Country, desc(counting)), y=Average, fill=as.factor(highlight)), position="dodge") + geom_point(aes( x = reorder(Country, desc(counting)), y=Median, color = as.factor(highlight)), size=3, shape=18) + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.2), legend.position='none', plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"))+ scale_fill_manual(values=PCcols)+ scale_color_manual(values=c(black,grey))+ scale_y_continuous(name="US Dollars (PPP)", labels=comma, expand = c(0, 0)) + xlab("Country") # dev.off() # white writing - to go in presentations on dark background, just average # emf(file = here("HILDA", "charts", "OECD_wealth_means_and_medians_presentation.emf"), width = 5.90551, height = 3.14961) OECD_wealth %>% mutate(highlight = ifelse(Country=='Australia', 1, 0)) %>% arrange(Average) %>% mutate(one = 1, counting = cumsum(one)) %>% ggplot() + geom_col(aes(x = reorder(Country, desc(counting)), y=Average, fill=as.factor(highlight)), position="dodge") + # geom_point(aes( x = reorder(Country, desc(counting)), y=Median, color = as.factor(highlight)), size=3, shape=18) + theme( axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.2, colour=white), # plot.background = element_rect(fill = dark_grey), axis.text.y = element_text(colour=white), axis.title.x = element_text(colour=white), axis.title.y = element_text(colour=white), legend.position='none', plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"))+ scale_fill_manual(values=PCcols)+ scale_color_manual(values=c(black,grey))+ scale_y_continuous(name="US Dollars (PPP)", labels=comma, expand = c(0, 0)) + xlab("Country") # dev.off() ############### # Figure 4.7 - Credit Suisse ginis of wealth (figure 4.7) # read in data int_ginis <- read.csv(here("credit suisse wealth ginis 2017.csv"), header=TRUE) int_ginis <- tbl_df(int_ginis) # emf(file = here("HILDA", "charts", "international_wealth_ginis.emf"), width = 5.90551, height = 3.14961) int_ginis %>% ggplot(aes(x = reorder(country, desc(gini)), y=gini, col=as.factor(highlight) ) ) + geom_point(size=3) + geom_segment(aes(xend=reorder(country, desc(gini)), y=0, yend=gini))+ theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.2), legend.position='none', plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"))+ scale_color_manual(values=PCcols)+ scale_y_continuous(limits = c(0,1), expand = c(0,0))+ xlab("Country") + ylab("Gini coefficient") # dev.off() # white writing - to go in presentations on dark background # emf(file = here("HILDA", "charts", "international_wealth_ginis_presentation.emf"), width = 5.90551, height = 3.14961) int_ginis %>% ggplot(aes(x = reorder(country, desc(gini)), y=gini, col=as.factor(highlight) ) ) + geom_point(size=3) + geom_segment(aes(xend=reorder(country, desc(gini)), y=0, yend=gini))+ theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.2, colour=white), axis.text.y = element_text(colour=white), axis.title.x = element_text(colour=white), axis.title.y = element_text(colour=white), legend.position='none', plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"))+ scale_color_manual(values=PCcolsPres)+ scale_y_continuous(limits = c(0,1), expand = c(0,0))+ xlab("Country") + ylab("Gini coefficient") # dev.off() ########################################################################## # Section 4.2 - Components of wealth ########### # (in text) percentage of unequivalised wealth in owner occupied housing and super. (HILDA checking HES) dat %>% filter(year==2014) %>% select(hh_wt, hh_home_equity, hh_super, hh_wealth) %>% summarise(perc_home = (sum(hh_wt*hh_home_equity))/(sum(hh_wt*hh_wealth)), perc_super = (sum(hh_wt*hh_super))/(sum(hh_wt*hh_wealth))) ################ # Figure 4.8 - Average wealth by wealth typea, for each wealth decile - In dollars (top panel) and as a share of average total wealth (bottom panel) (figure 4.8) dat %>% filter(year==2014) %>% select(year, hhwt, wealth_dec, eq_home_equity, eq_super, eq_other_wealth) %>% gather(asset_type, value, -year, -hhwt, -wealth_dec) %>% group_by(asset_type, wealth_dec) %>% summarise(group_total = wtd.mean(value, hhwt) ) %>% ggplot(aes(x=as.factor(wealth_dec), y=group_total, fill = asset_type, group = asset_type))+ geom_col(position="stack") + scale_y_continuous(expand=c(0,0), label=comma)+ scale_x_discrete(expand=c(0.01, 0.01))+ scale_fill_manual(values=PCcols)+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), axis.title.y = element_blank(), legend.position = "right", legend.title.align = 0.5, legend.title = element_text(face = "bold")) + labs(x = "Wealth decile", fill = "Wealth type") dat %>% filter(year==2014) %>% select(year, hhwt, wealth_dec, eq_home_equity, eq_other_property_equity, eq_super, eq_business_equity, eq_financial_wealth, eq_vehicles, eq_personal_wealth) %>% gather(asset_type, value, -year, -hhwt, -wealth_dec) %>% group_by(asset_type, wealth_dec) %>% summarise(group_total = wtd.mean(value, hhwt) ) %>% ggplot(aes(x=as.factor(wealth_dec), y=group_total, fill = asset_type, group = asset_type))+ geom_col(position="fill") + scale_y_continuous(expand=c(0,0), label=comma)+ scale_x_discrete(expand=c(0.01, 0.01))+ scale_fill_manual(values=PCcols)+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), axis.title.y = element_blank(), legend.position = "right", legend.title.align = 0.5, legend.title = element_text(face = "bold")) + labs(x = "Wealth decile", fill = "Wealth type") ################### #Figure 4.9 - Dollar change in wealth by wealth decile and wealth type, 2003-04 to 2015-16 (figure 4.9) - housing and super have driven growth in hosuehold wealth # emf(file = here("HES","Charts","dollar_change_in_wealth_type_by_wealth_dec.emf"), width = 5.90551, height = 3.14961) dat %>% select(year, hhwt, wealth_dec, eq_home_equity, eq_other_property_equity, eq_super, eq_business_equity, eq_financial_wealth, eq_vehicles, eq_personal_wealth) %>% filter(year==2014 | year==2002) %>% gather(wealth_type,value,c(-year,-wealth_dec, -hhwt)) %>% group_by(year, wealth_type, wealth_dec) %>% summarise(mean_value=wtd.mean(value, weight = hhwt)) %>% ungroup() %>% arrange(wealth_dec,wealth_type,year) %>% mutate(diff=mean_value-lag(mean_value,1)) %>% filter(year==2014) %>% ggplot(aes(x = wealth_type, y = diff, group = wealth_dec, fill = wealth_dec)) + geom_col(position="dodge")+ scale_y_continuous(labels = comma, expand=c(0.01,0.01), limits = c(-100000, 275000), breaks = seq(-100000,250000,50000))+ scale_x_discrete(expand = c(0.01, 0.01))+ labs(x = "Type of wealth", fill = "Wealth decile:")+ PCcols_gradient+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.title = element_text(), axis.line.x = element_blank(), axis.title.y = element_blank())+ geom_hline(yintercept = 0, colour = grey)+ guides(fill=guide_legend(nrow=1, byrow=TRUE)) # dev.off() #################################################################### # SECTION 4.3 ######### # Figure 4.10 - income and wealth deciles JOINT DISTRIBUTION (2014-15 for comparison, most recetn HILDA wealth year) (Figure 4.10) inc_wealth_prop <- as.data.frame(prop(wtd.table(dat$inc_dec[dat$year == 2014], dat$wealth_dec[dat$year == 2014], dat$hhwt[dat$year == 2014]))) colnames(inc_wealth_prop) <- c("inc_dec", "wealth_dec", "per_cent") inc_wealth_prop <- inc_wealth_prop %>% filter(inc_dec!="Total", wealth_dec!="Total") inc_wealth_prop %>% ggplot(aes(x = factor(wealth_dec), y = per_cent, fill = as.numeric(inc_dec), group = factor(inc_dec))) + geom_col(position = "dodge")+ scale_y_continuous(expand=c(0,0))+ scale_x_discrete(expand=c(0.1, 0.01))+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.position = c(0.5,0.9), legend.title.align = 0.5, legend.title = element_text(face = "bold")) + labs(x = "Wealth decile", y = "Per cent", fill = "Disposable income decile")+ PCcols_gradient+ guides(fill=guide_legend(nrow=1, byrow=TRUE))+ geom_hline(yintercept = 1, colour = grey, size = 0.5, linetype = "dashed") ################# # Figure 4.11 (a) INCOME AND PRIVATE (not final) CONSUMPTION MEANS by wealth decile (have to do wealth separately because of scale) - (Figure 4.11 (a) and (b)) dat %>% filter(year==2014) %>% select(hh_wt, wealth_dec, eq_disp_inc, eq_cons) %>% gather(key="measure", value="value", -hh_wt, -wealth_dec) %>% group_by(wealth_dec, measure) %>% summarise(wtd_avg = wtd.mean(value, hh_wt)) %>% ggplot(aes(x=wealth_dec,y=wtd_avg,group=factor(measure),fill=factor(measure))) + geom_col(position ="dodge")+ facet_grid(. ~ measure) + scale_y_continuous(labels=comma,name="average income/consumption", breaks=seq(0,250000,10000))+ scale_x_continuous(breaks=seq(1,10,1), name="Wealth decile")+ theme(legend.position = "bottom",legend.title=element_blank()) + theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.position = "none", axis.title.y=element_blank()) + scale_fill_manual(values = PCcols) # Figure 4.11 (b)- wealth dat %>% filter(year==2014) %>% select(hh_wt, wealth_dec, eq_wealth) %>% gather(key="measure", value="value", -hh_wt, -wealth_dec) %>% group_by(wealth_dec, measure) %>% summarise(wtd_avg = wtd.mean(value, hh_wt)) %>% ggplot(aes(x=wealth_dec,y=wtd_avg,group=factor(measure),fill=factor(measure))) + geom_col(position ="dodge")+ facet_grid(. ~ measure) + scale_y_continuous(labels=comma,name="average income/consumption", breaks=seq(0,2500000,500000))+ scale_x_continuous(breaks=seq(1,10,1), name="Wealth decile")+ theme(legend.position = "bottom",legend.title=element_blank()) + theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.position = "none", axis.title.y=element_blank()) + scale_fill_manual(values = PCcols) ############## # Figure 4.12 (a and b) - share of age group in each income and wealth decile # Figure 4.12 (a) wealth deciles dat %>% filter(year==2014) %>% group_by(wealth_dec, age_group_string) %>% summarise(pop = sum(hhwt)) %>% ungroup() %>% group_by(age_group_string) %>% mutate(percent = pop/sum(pop)) %>% ggplot(aes(x=factor(age_group_string), y=percent*100, fill=wealth_dec, group=wealth_dec ) ) + # geom_hline(yintercept = 10)+ geom_col(position = "dodge")+ scale_x_discrete(expand = c(0.1, 0.1))+ scale_y_continuous(expand=c(0,0), limits = c(0,25))+ PCcols_gradient+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.title = element_text()) + labs(x = "Age group", y = "Per cent", fill = "Wealth decile")+ geom_hline(yintercept = 10, colour = grey, size = 0.5, linetype = "dashed")+ guides(fill=guide_legend(nrow=1, byrow=TRUE)) # Figure 4.12 (b) income deciles dat %>% filter(year==2016) %>% group_by(inc_dec, age_group_string) %>% summarise(pop = sum(hhwt)) %>% ungroup() %>% group_by(age_group_string) %>% mutate(percent = pop/sum(pop)) %>% ggplot(aes(x=factor(age_group_string), y=percent*100, fill=inc_dec, group=inc_dec ) ) + # geom_hline(yintercept = 10)+ geom_col(position = "dodge")+ scale_x_discrete(expand = c(0.1, 0.1))+ scale_y_continuous(expand=c(0,0), limits = c(0,25))+ PCcols_gradient+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.title = element_text()) + labs(x = "Age group", y = "Per cent", fill = "Income decile")+ geom_hline(yintercept = 10, colour = grey, size = 0.5, linetype = "dashed")+ guides(fill=guide_legend(nrow=1, byrow=TRUE)) # Figure 4.13 - Average equivalised wealth by age and birth decade - uses HILDA longitudinal ############ # Figure 4.14 (a) - group by household type and show proportion in each decile - WEALTH - Figure 4.14 (a) dat %>% filter(year==2014) %>% group_by(wealth_dec, household_type) %>% summarise(pop = sum(hhwt)) %>% ungroup() %>% complete(wealth_dec,household_type) %>% group_by(household_type) %>% mutate(percent = pop/sum(pop, na.rm=TRUE)) %>% ggplot(aes(x = household_type, y=percent*100, fill=wealth_dec, group=wealth_dec) ) + # geom_hline(yintercept = 10)+ geom_col(position = "dodge")+ scale_x_discrete(expand = c(0.1, 0.1), labels = household_type_labels_spaced)+ scale_y_continuous(expand=c(0,0))+ PCcols_gradient+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.title = element_text()) + labs(x = "Household type", y = "Per cent", fill = "Wealth decile:")+ geom_hline(yintercept = 10, colour = grey, size = 0.5, linetype = "dashed") # Figure 4.14 (b) - group by household type and show proportion in each decile - INCOME - Figure 4.14 (b) dat %>% group_by(year, inc_dec, household_type) %>% summarise(pop = sum(hhwt)) %>% ungroup() %>% group_by(year, household_type) %>% mutate(percent = pop/sum(pop)) %>% filter(year==2014) %>% ggplot(aes(x = household_type, y=percent*100, fill=inc_dec, group=inc_dec)) + # geom_hline(yintercept = 10)+ geom_col(position = "dodge")+ scale_x_discrete(expand = c(0.1, 0.1), labels = household_type_labels_spaced)+ scale_y_continuous(expand=c(0,0), breaks = seq(0,60,10))+ PCcols_gradient+ theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm"), legend.title = element_text()) + labs(x = "Household type", y = "Per cent", fill = "Income decile:")+ geom_hline(yintercept = 10, colour = grey, size = 0.5, linetype = "dashed")+ guides(fill=guide_legend(nrow=1, byrow=TRUE)) # check HES result in HILDA (in text) # Retiree households who receive pension payments typically have low incomes, although many have substantial wealth. More than half of this wealth is in the form of owner occupied housing dat %>% filter(year==2014) %>% select(household_type, eq_wealth, eq_home_equity, hhwt) %>% group_by(household_type) %>% summarise(avg_wealth = wtd.mean(eq_wealth, hhwt) , avg_home_wealth = wtd.mean(eq_home_equity, hhwt)) %>% mutate(percent = avg_home_wealth/avg_wealth) ############################################## # Chapter 5 - MOBILITY ############################################ ######## # earnings elasticities - grouped by son's position (figure 5.2) earning <- read.csv(here("shared_dataframes", "earnings_elasticities.csv"), header=TRUE) earning <- earning %>% gather(key="son", value="probability", -dad_percentile) # emf(file = here("HILDA", "charts", "earnings_elasticities.emf"), width = 5.90551, height = 3.14961) earning %>% ggplot(aes(x=son, y=probability, fill=dad_percentile)) + geom_col(position="dodge") + theme( legend.title = element_text("Father's earnings percentile" ), legend.position = "top" ) + scale_y_continuous(name="Probability", breaks=seq(0,70,10), expand = c(0.0001, 0.0001))+ scale_x_discrete(name="Son's position in earnings distribution", labels=son_position_labels, expand = c(0.02, 0.02)) + scale_fill_manual(values= PCcols,labels=father_percentile_labels, name="Father's earnings percentile:") + theme(plot.margin = unit(c(0.13,0.25,0.13,0.25), "cm") ) # dev.off() ########## # median deciles by age - median person (figure 5.7) # create joint dataframe with medians for plotting as facet with median person's decile for income, wealth and consumption med_dec <- dat %>% filter(age<90) %>% group_by(year, age) %>% summarise(med_inc_dec = wtd.quantile(inc_dec, weights = hh_wt, probs=0.5) ) %>% ungroup() %>% group_by(age) %>% summarise(middle_inc = median(med_inc_dec)) med_dec_wealth <- dat %>% filter(year %in% wealth_years) %>% filter(age<90) %>% group_by(year, age) %>% summarise(med_wealth_dec = wtd.quantile(wealth_dec, weights = hh_wt, probs=0.5) )%>% ungroup() %>% group_by(age) %>% summarise(middle_wealth = median(med_wealth_dec)) # join to other dataframe and remove med_dec <- left_join(med_dec, med_dec_wealth, by="age") rm(med_dec_wealth) med_dec_cons <- dat_cons %>% filter(age<90) %>% group_by(age) %>% summarise(med_cons_dec = wtd.quantile(cons_dec, weights = hh_wt, probs=0.5) ) %>% ungroup() %>% group_by(age) %>% summarise(middle_cons = median(med_cons_dec)) # join to other dataframe and remove med_dec <- left_join(med_dec, med_dec_cons, by="age") rm(med_dec_cons) ## income # emf(file = here("HILDA", "charts", "med_dec_inc.emf"), width = 5.90551, height = 2.3) dat %>% filter(age >= 15 & age<=85) %>% group_by(year, age) %>% summarise(med_inc_dec = wtd.quantile(inc_dec, weights = hh_wt, probs=0.5) ) %>% ungroup() %>% group_by(age) %>% summarise(middle_inc = median(med_inc_dec)) %>% ggplot(aes(x=age, y=middle_inc)) + geom_point(col=blue) + scale_y_continuous(breaks = seq(0,10,2), limits = c(0,10), name="Median income decile") + scale_x_continuous(breaks = seq(0,90,10), limits = c(15,85), name="Age" ) # dev.off() ## income for presentation on a dark background # emf(file = here("HILDA", "charts", "med_dec_inc_presentation.emf"), width = 5.90551, height = 2.3) dat %>% filter(age >= 15 & age<=85) %>% group_by(year, age) %>% summarise(med_inc_dec = wtd.quantile(inc_dec, weights = hh_wt, probs=0.5) ) %>% ungroup() %>% group_by(age) %>% summarise(middle_inc = median(med_inc_dec)) %>% ggplot(aes(x=age, y=middle_inc)) + geom_point(col=blue) + scale_y_continuous(breaks = seq(0,10,2), limits = c(0,10), name="Median income decile") + scale_x_continuous(breaks = seq(0,90,10), limits = c(15,85), name="Age" ) + theme(axis.text.x = element_text(colour=white), axis.text.y = element_text(colour=white), axis.title.x = element_text(colour=white), axis.title.y = element_text(colour=white)) # dev.off() ### wealth # emf(file = here("HILDA", "charts", "med_dec_wealth.emf"), width = 5.90551, height = 2.3) dat %>% filter(year %in% wealth_years) %>% filter(age>=15 & age<=85) %>% group_by(year, age) %>% summarise(med_wealth_dec = wtd.quantile(wealth_dec, weights = hh_wt, probs=0.5) )%>% ungroup() %>% group_by(age) %>% summarise(middle_wealth = median(med_wealth_dec)) %>% ggplot(aes(x=age, y=middle_wealth)) + geom_point(col=dark_blue) + scale_y_continuous(breaks = seq(0,10,2), limits = c(0,10), name="Median wealth decile") + scale_x_continuous(breaks = seq(0,90,10), limits = c(15,85), name="Age" ) # dev.off() # wealth - PC green and white writing for presentation on a dark background # emf(file = here("HILDA", "charts", "med_dec_wealth_presentation.emf"), width = 5.90551, height = 2.3) dat %>% filter(year %in% wealth_years) %>% filter(age>=15 & age<=85) %>% group_by(year, age) %>% summarise(med_wealth_dec = wtd.quantile(wealth_dec, weights = hh_wt, probs=0.5) )%>% ungroup() %>% group_by(age) %>% summarise(middle_wealth = median(med_wealth_dec)) %>% ggplot(aes(x=age, y=middle_wealth)) + geom_point(col=PC_green) + scale_y_continuous(breaks = seq(0,10,2), limits = c(0,10), name="Median wealth decile") + scale_x_continuous(breaks = seq(0,90,10), limits = c(15,85), name="Age" ) + theme(axis.text.x = element_text(colour=white), axis.text.y = element_text(colour=white), axis.title.x = element_text(colour=white), axis.title.y = element_text(colour=white)) # dev.off() ## consumption # emf(file = here("HILDA", "charts", "med_dec_cons.emf"), width = 5.90551, height = 2.3) dat_cons %>% filter(age>=15 & age<=85) %>% group_by(year, age) %>% summarise(med_cons_dec = wtd.quantile(cons_dec, weights = hh_wt, probs=0.5) ) %>% ungroup() %>% group_by(age) %>% summarise(middle_cons = median(med_cons_dec)) %>% ggplot(aes(x=age, y=middle_cons)) + geom_point(col=PC_green) + scale_y_continuous(breaks = seq(0,10,2), limits = c(0,10), name="Median consumption decile") + scale_x_continuous(breaks = seq(0,90,10), limits = c(15,85), name="Age" ) # dev.off() ####################################################################### # CHAPTER 6 - POVERTY ####################################################################### ########## Need to do this stuff first for poverty ##Indicator variable for Headey definition of financial poverty (using liquid assets less than 25% of poverty lines as filter) dat <- dat %>% group_by(year) %>% mutate(fin_poverty_Headey_liquid_assets = ifelse(year %in% wealth_years & year %in% cons_years, ifelse(eq_disp_inc < (0.5 * weighted.quantile(eq_disp_inc, w = hhwt, probs = 0.5)) & eq_cons < (0.5 * weighted.quantile(eq_cons, w = hhwt, probs = 0.5)) & eq_liquid_assets < (0.25 * 0.5 * weighted.quantile(eq_disp_inc, w = hhwt, probs = 0.5)), 1 ,0), NA) ) # Consumption poverty (no in kind transfers in HILDA) dat <- dat %>% group_by(year) %>% mutate(cons_no_inkind_poverty = ifelse(year %in% cons_years, ifelse(eq_cons < (0.5 * wtd.quantile(eq_cons, w = hhwt, probs = 0.5) ), 1 ,0), NA) ) #Variable for half of median cons (no in kind in HILDA) dat <- dat %>% group_by(year) %>% mutate(cons_no_inkind_poverty_line = ifelse(year %in% cons_years, 0.5 * wtd.quantile(eq_cons, w = hhwt, probs=0.5), NA)) ########### Can go bit-by-bit from here (in poverty section) ################### # Figure 6.1 - Plot of percentage in poverty by year (figure 6.1) #Per cent in poverty - check against HES ################ (note that consumption and wealth are for current year, but income is for previous financial year) perc_poverty <- dat %>% group_by(year) %>% summarise(income = weighted.mean(inc_poverty, w = hhwt), cons_no_in_kind = weighted.mean(cons_no_inkind_poverty, w = hhwt), # can't do final consumption in HILDA (no in kind) financial_Headey_liquid_assets = weighted.mean(fin_poverty_Headey_liquid_assets, w = hhwt)) perc_poverty %>% gather(key = "variable", value = "percent",-year) %>% filter(variable == "income" | variable == "cons_no_in_kind" | variable == "financial_Headey_liquid_assets") %>% ggplot(aes(x=year,y=percent*100,col=variable)) + geom_line(size=1)+ geom_point(size=3, shape=16)+ scale_y_continuous(name="Per cent")+ scale_x_continuous(name = "", breaks = HES_years, labels = inc_fin_year_labels)+ scale_color_manual(values=PCcols, labels = poverty_labels) # figure 6.2 - can't match anchored poverty in HILDA as doesn't go back far enough ################# # Figure 6.3 - Average relative povery gaps (dollars and per cent) # - to check against HES (note that consumption and wealth are for current year, but income is for previous financial year) #Variable for half of median disposable income dat <- dat %>% group_by(year) %>% mutate(inc_poverty_line = 0.5 * wtd.quantile(eq_disp_inc, w = hhwt, probs=0.5)) #Variable for half of median consumption (no inkind) - not possible prior to 2006 (no consumption data) dat <- dat %>% group_by(year) %>% mutate(cons_no_inkind_poverty_line = ifelse(year<2006,-99999, 0.5 * wtd.quantile(eq_cons, w = hhwt, probs=0.5) ) ) #Mean poverty gap (dollars and percent) mean_poverty_gap_dollars <- dat %>% group_by(year) %>% summarise(income = weighted.mean(inc_poverty_line - eq_disp_inc, w = hhwt * inc_poverty), cons_no_in_kind = weighted.mean(cons_no_inkind_poverty_line - eq_cons, w = hhwt * cons_no_inkind_poverty)) mean_poverty_gap_percent <- dat %>% group_by(year) %>% summarise(income = weighted.mean((inc_poverty_line - eq_disp_inc) / inc_poverty_line, w = hhwt * inc_poverty), cons_no_in_kind = weighted.mean((cons_no_inkind_poverty_line - eq_cons) / cons_no_inkind_poverty_line, w = hhwt * cons_no_inkind_poverty)) # Plot of mean poverty gaps by year (dollars) (figure 6.3 a) mean_poverty_gap_dollars %>% gather(key = "variable", value = "value",-year) %>% ggplot(aes(x=year,y=value,col=variable)) + geom_line(size=1)+ geom_point(size=3, shape=16)+ scale_y_continuous(name="$ (2016-17)", labels = comma, limits = c(9,10000))+ scale_x_continuous(name = "", breaks = HES_years, labels = inc_fin_year_labels)+ scale_color_manual(values=PCcols) #Plot of mean poverty gaps by year (percent) (figure 6.3 b) mean_poverty_gap_percent %>% gather(key = "variable", value = "percent",-year) %>% ggplot(aes(x=year,y=percent*100,col=variable)) + geom_line(size=1)+ geom_point(size=3, shape=16)+ scale_y_continuous(name="Per cent", limits = c(0,80))+ scale_x_continuous(name = "", breaks = HES_years, labels = inc_fin_year_labels)+ scale_color_manual(values=PCcols) # Figure 6.4 - This chart done with HILDA data in excel ################ # Figure 6.5 and 6.6 #Per cent in poverty by household type perc_poverty_by_household <- dat %>% group_by(year, household_type_poverty) %>% summarise(income = weighted.mean(inc_poverty, w = hhwt), cons_no_inkind = weighted.mean(cons_no_inkind_poverty, w = hhwt)) # Figure 6.5 perc_poverty_by_household %>% filter(year==2016) %>% gather(key="variable", value="percent", -household_type_poverty, -year) %>% ggplot(aes(x=household_type_poverty, y=percent, fill=variable)) + geom_col(position="dodge") # Figure 6.6 - panel a perc_poverty_by_household %>% filter(year %in% cons_years) %>% ggplot(aes(x=year, y=cons_no_inkind, col=household_type_poverty) ) + geom_line() + geom_point() # Figure 6.6 - panel b perc_poverty_by_household %>% ggplot(aes(x=year, y=income, col=household_type_poverty) ) + geom_line() + geom_point() #################### # Figure 6.7 - income poverty rates by age group # Per cent in poverty by age group inc_poverty_by_age_group <- dat %>% filter(year %in% HES_years) %>% group_by(year, age_group) %>% summarise(HILDA_inc_poverty_rate = sum(inc_poverty*hhwt) / sum(hhwt) ) %>% arrange(age_group) inc_poverty_by_age_group %>% ggplot(aes(x=year, y=HILDA_inc_poverty_rate, col=age_group)) + geom_line() + geom_point() #################### # Figure 6.8 - consumption poverty rates by age group cons_no_inkind_poverty_by_age_group <- dat %>% filter(year %in% HES_years & year>=2006) %>% group_by(year, age_group) %>% summarise(cons_no_inkind_poverty_rate = sum(cons_no_inkind_poverty*hhwt) / sum(hhwt) ) %>% arrange(age_group) cons_no_inkind_poverty_by_age_group %>% filter(year %in% cons_years) %>% ggplot(aes(x=year, y=cons_no_inkind_poverty_rate, col=age_group)) + geom_line() + geom_point() ################################################################### # MATERIAL DEPRIVATION (IN POVERTY CHAPTER) ################################################################ # 1. Filter to include only the 2014 wave # # 2. select only relevant variables, turn factor variables into integers and adjust to base 0 # mat_deprivation <- dat %>% filter(year==2014) %>% dplyr::select(hh_id, hh_wt, rp_wt, inc_poverty, starts_with("mdha")) mat_deprivation <- mat_deprivation %>% mutate(mdschoolclothes = as.integer(mdhacbk)-10, mdchilddentist = as.integer(mdhacdc)-10, mdchildhobby = as.integer(mdhacho)-10, mdchildbeds = as.integer(mdhacsb)-10, mdchildschooltrip = as.integer(mdhacst)-10, mddentist = as.integer(mdhadt)-10, mdfurniture = as.integer(mdhafu)-10, mdcatchup = as.integer(mdhagt)-10, mdheating = as.integer(mdhahaw)-10, mdhcinsurance = as.integer(mdhahci)-10, mdmedical = as.integer(mdhamt)-10, mdvehicle = as.integer(mdhamv)-10, mdcarinsure = as.integer(mdhamvi)-10, mdcphone = as.integer(mdhaph)-10, mdscripts = as.integer(mdhapm)-10, mdroof = as.integer(mdharg)-10, mdsavings = as.integer(mdhasa)-10, mdsecure = as.integer(mdhash)-10, mdlocks = as.integer(mdhasl)-10, mdfood = as.integer(mdhasm)-10, mdwarmclothes = as.integer(mdhawc)-10, mdwasher = as.integer(mdhawm)-10) # 3. mutate each new material deprivation column to binary, where 1 = deprived # mat_deprivation <- mat_deprivation %>% mutate(mdschoolclothes1 = ifelse(mdschoolclothes == 1, 1, 0), mdchilddentist1 = ifelse(mdchilddentist == 1, 1, 0), mdchildhobby1 = ifelse(mdchildhobby == 1, 1, 0), mdchildbeds1 = ifelse(mdchildbeds == 1, 1, 0), mdchildschooltrip1 = ifelse(mdchildschooltrip == 1, 1, 0), mddentist1 = ifelse(mddentist == 1, 1, 0), mdfurniture1 = ifelse(mdfurniture == 1, 1, 0), mdcatchup1 = ifelse(mdcatchup == 1, 1, 0), mdheating1 = ifelse(mdheating == 1, 1, 0), mdhcinsurance1 = ifelse(mdhcinsurance == 1, 1, 0), mdmedical1 = ifelse(mdmedical == 1, 1, 0), mdvehicle1 = ifelse(mdvehicle == 1, 1, 0), mdcarinsure1 = ifelse(mdcarinsure == 1, 1, 0), mdcphone1 = ifelse(mdcphone == 1, 1, 0), mdscripts1 = ifelse(mdscripts == 1, 1, 0), mdroof1 = ifelse(mdroof == 1, 1, 0), mdsavings1 = ifelse(mdsavings == 1, 1, 0), mdsecure1 = ifelse(mdsecure == 1, 1, 0), mdlocks1 = ifelse(mdlocks == 1, 1, 0), mdfood1 = ifelse(mdfood == 1, 1, 0), mdwarmclothes1 = ifelse(mdwarmclothes == 1, 1, 0), mdwasher1 = ifelse(mdwasher == 1, 1, 0)) # 4. mutate new column for total deprivation score: sum of all the binary columns created above # # (I couldn't figure out how to do this shorthand i.e. by summing across the range of columns, so had to write them all out) # mat_deprivation <- mat_deprivation %>% mutate(mdtotal = mdschoolclothes1 + mdchilddentist1 + mdchildhobby1 + mdchildbeds1 + mdchildschooltrip1 + mddentist1 + mdfurniture1 + mdcatchup1 + mdheating1 + mdhcinsurance1 + mdmedical1 + mdvehicle1 + mdcarinsure1 + mdcphone1 + mdscripts1 + mdroof1 + mdsavings1 + mdsecure1 + mdlocks1 + mdfood1 + mdwarmclothes1 + mdwasher1) # 5. mutate new binary columns for "deprived of 2+ essentials" and "deprived of 3+ essentials" # mat_deprivation <- mat_deprivation %>% mutate(deprived2 = ifelse(mdtotal > 1, 1, 0), deprived3 = ifelse(mdtotal > 2, 1, 0)) View(mat_deprivation) # 6. create new tbl only for respondents in income poverty and cut out intermediate variables (for determining deprivation score) # poverty_dep <- mat_deprivation %>% select(hh_id, hh_wt, rp_wt, inc_poverty, mdtotal, deprived2, deprived3) %>% filter(inc_poverty == 1) View(poverty_dep) # 7. Calculate average deprivation score for people in income poverty # poverty_dep %>% summarise(mean_ds = wtd.mean(mdtotal, weights=hh_wt)) ### Result: 1.12 ### # 8. calculate proportion of people in income poverty who are deprived of >=2 and >=3 essentials # poverty_dep %>% summarise(perc_deprived2 = wtd.mean(deprived2, weights=hh_wt)) ### Result: 29.1% ### poverty_dep %>% summarise(perc_deprived3 = wtd.mean(deprived3, weights=hh_wt)) ### Result: 18.7% ### # 9. create new tbl only for respondents who are deprived of >=2 essentials and cut out intermediate variables # deprivation_pov <- mat_deprivation %>% select(hh_id, hh_wt, rp_wt, inc_poverty, mdtotal, deprived2, deprived3) %>% filter(deprived2 == 1) View(deprivation_pov) # 10. Calculate proportions of 2-deprived and 3-deprived people who are in income poverty # deprivation_pov %>% summarise(dep2_poverty = wtd.mean(inc_poverty, weights=hh_wt)) ### Result: 24.9% ### deprivation_pov <- deprivation_pov %>% filter(deprived3 == 1) deprivation_pov %>% summarise(dep3_poverty = wtd.mean(inc_poverty, weights=hh_wt)) ### Result: 28.4% ### # 11. Finally, calculate what % of all people are both in income poverty AND 2-deprived (and then 3-deprived) # mat_deprivation <- mat_deprivation %>% mutate(dep2_plus_poverty = ifelse(deprived2 == 1 & inc_poverty == 1, 1, 0), dep3_plus_poverty = ifelse(deprived3 == 1 & inc_poverty == 1, 1, 0)) mat_deprivation %>% summarise(perc_overlap2 = wtd.mean(dep2_plus_poverty, weights=hh_wt)) ### Result: 2.9% ### mat_deprivation %>% summarise(perc_overlap3 = wtd.mean(dep3_plus_poverty, weights=hh_wt)) ### Result: 1.9% ###