######################################################################### # Direct Small Area Estimation # Use library laeken for point and variance estimation at domain level # Produce maps of the estimates # Model-based estimation of averages using area level and unit level model with the sae package # Model-based of non-linear indicators using the emdi package # Applications to income data using the EU-SILC survey ######################################################################### rm(list=ls()) setwd(paste0(R.home(), "/../workshop")) # Loading libraries and the data library(laeken) library(emdi) library(colorRamps) library(gridExtra) library(simFrame) library(dplyr) library(sae) library(ggplot2) # Datasets data("eusilc") data("eusilcP") data("eusilcA_pop") data("eusilcA_smp") # Head Count Ratio with survey weights hcr_national<-arpr("eqIncome", weights = "rb050", data = eusilc) # Point and Variance estimation - At-risk-of-poverty rate hcr_nuts2<- arpr("eqIncome", weights = "rb050", breakdown = "db040", data = eusilc) hcr_var<-variance("eqIncome", weights = "rb050", breakdown = "db040", design = "db040", data = eusilc, indicator = hcr_nuts2, bootType = "naive", seed = 123,R=500) # HCR - breakdown by NUTS2 and gender groups eusilc$genderregion<-interaction(eusilc$db040,eusilc$rb090) hcr_nuts2_gender<-arpr("eqIncome", weights = "rb050", data = eusilc, breakdown="genderregion") hcr_nuts2_gender # Produce estimates of Quintile Share Ratio and visualise on a Map Karte <- readRDS("AUT_adm1.rds") Karte@data$NAME_1 <- c("Burgenland","Carinthia","Lower Austria","Upper Austria","Upper Austria", "Salzburg","Styria","Tyrol","Vorarlberg","Vorarlberg","Vienna") # Produce the map est_qsr<-qsr("eqIncome", weights = "rb050", data = eusilc,breakdown="db040") ind <- match(Karte@data$NAME_1,est_qsr$strata) # Matching Karte@data$QSR <- est_qsr$valueByStratum[ind,2] # Add estimator to map # Define the color of the maps ramp <- colorRamp(c("green","yellow", "red","black")) # Plot #pdf(file="Direct_austria_qsr.pdf", width=5, height=5, family="Times") spplot(Karte,c("QSR"),names.attr=c("QSR"),main="Quintile Share Ratio", col.regions=c(rgb(ramp(seq(0, 1, length = 200)), max = 255)),par.strip.text=list(lines=1.3), at=c(seq(min(Karte@data$QSR),max(Karte@data$QSR),length.out=100),Inf),colorkey=TRUE) #dev.off() # we filter by the main income holder per household to get household data eusilcP_HH <- eusilcP[eusilcP$main, ] # draw random rows in order to receive a sample from the population data set set.seed(2) sample_id <- sample(1:25000, 400, replace = FALSE, prob = NULL) # Receive sample on houehold level corresponding to population data eusilcS_HH <- eusilcP[sample_id, ] # How are the sample observations distributed in the regions? summary(eusilcS_HH$region) # Direct estimation of mean using sae-package fit_direct <- direct(y = eqIncome,dom = region, data = eusilcS_HH, replace=T) # Aggregation of the covariates on region level eusilcP_HH_agg<-tbl_df(eusilcP_HH) %>% group_by(region) %>% summarise(hy090n = mean(hy090n)) %>% ungroup() %>% mutate(Domain = fit_direct$Domain) data_frame <- left_join(eusilcP_HH_agg, fit_direct, by = "Domain") %>% mutate(var = SD^2) # Estimation of the FH-model fit_FH <- mseFH(formula = Direct ~ hy090n,vardir = var, data = as.data.frame(data_frame)) # Comparison CV of direct and FH FH_CV <- 100*sqrt(fit_FH$mse) / fit_FH$est$eblup results <- data.frame(fit_direct, FH_est = fit_FH$est$eblup, FH_CV) # Aggregation of the covariates on region level eusilcP_HH_agg<-tbl_df((eusilcP_HH))%>%group_by((region))%>%summarise(py010n=mean(py010n), py050n=mean(py050n), hy090n=mean(hy090n),n=n())%>% ungroup()%>%mutate(Domain=fit_direct$Domain) data_frame<-left_join(eusilcP_HH_agg,fit_direct,by="Domain")%>%mutate(var=SD^2) data_frame<-as.data.frame(data_frame) Xmean<-data.frame(region=data_frame[,1],hy090n=data_frame$hy090n,py010n=data_frame$py010n,py050n=data_frame$py050n) Popsize<-data.frame(region=data_frame[,1],n=data_frame$n) # Estimation of the Unit-level model (Battese-Harter-Fuller) fit_EBLUP<-eblupBHF(formula=as.numeric(eqIncome)~py010n + py050n+hy090n,dom=region,data=eusilcS_HH,meanxpop=Xmean,popnsize=Popsize) # MSE estimation of the Unit-level model MSE_EBLUP<-pbmseBHF(formula=as.numeric(eqIncome)~py010n + py050n+hy090n,dom=region,data=eusilcS_HH,meanxpop=Xmean,popnsize=Popsize) # Comparison with direct estimator EBLUP_CV<-100*sqrt(MSE_EBLUP$mse$mse)/fit_EBLUP$eblup$eblup results<-data.frame(fit_EBLUP$eblup,EBLUP_CV) # EBP estimation set.seed(1) #EBP with Box-Cox Transformation and bootstrap MSE emdi_model_box_cox <- ebp(fixed = eqIncome ~ gender + eqsize + cash + self_empl + unempl_ben + age_ben + surv_ben + sick_ben + dis_ben + rent + fam_allow + house_allow + cap_inv + tax_adj, pop_data = eusilcA_pop, pop_domains = "district", smp_data = eusilcA_smp, smp_domains = "district", threshold = 0.6*median(eusilcA_smp$eqIncome), transformation = "box.cox", L = 50, MSE = TRUE, B = 50,custom_indicator = list(my_max = function(y, threshold){max(y)}, my_min = function(y, threshold){min(y)}), na.rm = TRUE, cpus = 1) summary(emdi_model_box_cox) plot(emdi_model_box_cox) #Output of the results results=estimators(emdi_model_box_cox, indicator = "Median", MSE = TRUE) load_shapeaustria() # Create a suitable mapping table to use numerical identifiers of the shape # file # First find the right order dom_ord <- match(shape_austria_dis@data$PB, emdi_model_box_cox$ind$Domain) # Create the mapping table based on the order obtained above map_tab <- data.frame(pop_data_id = emdi_model_box_cox$ind$Domain[dom_ord], shape_id = shape_austria_dis@data$BKZ) # Create map plot for mean indicator - point and CV estimates but no MSE # using the numerical domain identifiers of the shape file map_plot(object = emdi_model_box_cox, MSE = FALSE, CV = FALSE, map_obj = shape_austria_dis, indicator = c("Median"), map_dom_id = "BKZ", map_tab = map_tab)