############################################################################ # R2MLWIN - A PROGRAM TO RUN THE MLWIN MULTILEVEL MODELLING SOFTWARE FROM # WITHIN R # # Zhengzheng Zhang, Chris Charlton, Richard Parker, George Leckie # and William Browne # Centre for Multilevel Modelling # University of Bristol # # http://www.bristol.ac.uk/cmm/software/r2mlwin/ # ############################################################################ ############################################################################ # 1. INTRODUCTION ############################################################################ ############################################################################ # 1.3 The r2mlwin package ############################################################################ # Install the R2MLwiN package from the Comprehensive R Archive Network # (CRAN) archive #install.packages("R2MLwiN") # Load the R2MLwiN package library(R2MLwiN) # Specify the file address for mlwin.exe on your computer as the global # option MLwiN_path options(MLwiN_path="C:/Program Files/MLwiN v3.05/") ############################################################################ # 2. CONTINUOUS RESPONSE MODELS ############################################################################ # Load the tutorial data into memory data(tutorial, package = "R2MLwiN") ############################################################################ # 2.1 Variance components model ############################################################################ # Fit a two-level students-within-schools variance components model to # students' age 16 scores (model1 <- runMLwiN(normexam ~ 1 + (1 | school) + (1 | student), estoptions=list(debugmode=TRUE), data=tutorial, stdout=NULL)) # Display the school-level VPC or ICC statistic b <- coef(model1) VPC <- b["RP2_var_Intercept"] / (b["RP2_var_Intercept"] + b["RP1_var_Intercept"]) names(VPC) <- "VPC" VPC # Re-fit the previous model , but do not pause in MLwiN (model1 <- runMLwiN(normexam ~ 1 + (1 | school) + (1 | student), data=tutorial, stdout=NULL)) ############################################################################ # 2.2 Random intercept model with predictor variables ############################################################################ # Generate a boys' school dummy variable tutorial$boysch = as.integer(tutorial$schgend=="boysch") # Generate a girls' school dummy variable tutorial$girlsch = as.integer(tutorial$schgend=="girlsch") tutorial$girl = as.integer(tutorial$sex=="girl") # Fit a two-level random intercept model for student age 16 scores with # predictor variables and retrieve the Empirical Bayes estimates of the # school random effects # (Save the estimation results under the name model2) (model2 <- runMLwiN(normexam ~ 1 + standlrt + girl + boysch + girlsch + (1 | school) + (1 | student), estoptions=list(resi.store=TRUE), data=tutorial, stdout=NULL)) # Test whether the boys' school effect is significantly different from the # girls' school effect if (!require(car)) install.packages("car"); library(car) linearHypothesis(model2, "FP_boysch = FP_girlsch") # Plot a quantile-quantile plot of the school residuals u0 <- model2@residual$lev_2_resi_est_Intercept qqnorm(u0) # Rank the school residuals u0se <- sqrt(model2@residual$lev_2_resi_var_Intercept) u0rank <- rank(u0) u0rankhi <- u0+u0se u0ranklo <- u0-u0se u0rankno <- order(u0rank) # Plot a caterpillar plot of the school residuals plot(1:65, u0[u0rankno], ylim=c(-1,1), pch=15, xlab="Rank",ylab="u0 residual estimate") points(1:65, u0rankhi[u0rankno], pch=24,bg="grey") points(1:65, u0ranklo[u0rankno], pch=25,bg="grey") for(i in 1:65) {lines(rep(i,2), c(u0ranklo[u0rankno[i]],u0rankhi[u0rankno[i]]))} # Count how many schools' residuals are significantly below zero sum(u0 + 1.96*u0se < 0) # Count how many schools' residuals are significantly above zero sum(u0 + 1.96*u0se > 0) # Drop the variables u0 and u0se rm(u0,u0se) ############################################################################ # 2.3 Random slope model ############################################################################ # Fit a two-level random slope model for student age 16 scores and retrieve # the empirical Bayes estimates of the school random effects # (Save the estimation results under the name model3) (model3 <- runMLwiN(normexam ~ 1 + standlrt + girl + boysch + girlsch + (1 + standlrt | school) + (1 | student), estoptions=list(resi.store=TRUE), data=tutorial, stdout=NULL)) # Display the correlation between the random intercepts and the random # slopes b <- coef(model3) corr <- b["RP2_cov_Intercept_standlrt"] / sqrt(b["RP2_var_Intercept"] + b["RP2_var_standlrt"]) names(corr) <- "corr" corr # Performs a likelihood-ratio test to compare this model to the previous # random intercept model if (!require(lmtest)) install.packages("lmtest"); library(lmtest) lrtest(model2, model3) # Display a table of model results for model2 and model3 if (!require(texreg)) install.packages("texreg"); library(texreg) screenreg(c(model2, model3)) # Generate predicted age 16 scores based only on age 11 scores and school # attended u0 <- model3@residual$lev_2_resi_est_Intercept u1 <- model3@residual$lev_2_resi_est_standlrt ypred = b["FP_Intercept"] + b["FP_standlrt"]*tutorial$standlrt + u0[tutorial$school] + u1[tutorial$school]*tutorial$standlrt # Plot the predicted age 16 scores against age 11 scores plot(model3@data$standlrt, ypred, type="n") for (i in 1:65) { lines(model3@data$standlrt[model3@data$school==i], ypred[model3@data$school==i], col="blue") } # Generate the predicted between-school variance for each student lev2var = b["RP2_var_Intercept"] + 2*b["RP2_cov_Intercept_standlrt"]*tutorial$standlrt + b["RP2_var_standlrt"]*tutorial$standlrt^2 # Plot the predicted between-school variance against age 11 scores plot(sort(tutorial$standlrt), lev2var[order(tutorial$standlrt)], type="l") ############################################################################ # 3. BINARY RESPONSE MODELS ############################################################################ # Load the Bangladeshi contraceptive use data into memory data(bang, package = "R2MLwiN") ############################################################################ # 3.1 Variance components model ############################################################################ # Fit a two-level women-within-districts variance components logit model by # PQL2 for women's contraceptive use (model4 <- runMLwiN(logit(use, cons) ~ 1 + (1 | district), D="Binomial", estoptions=list(nonlinear=c(N=1,M=2)), data=bang, stdout=NULL)) # Fit the previous model by MCMC using the PQL2 estimates from the previous # model as starting values for MCMC estimation. Model takes 12 seconds to # fit on our computer. (model5 <- runMLwiN(logit(use, cons) ~ 1 + (1 | district), D="Binomial", estoptions=list(EstM=1, startval=list(FP.b=model4@FP, FP.v=model4@FP.cov, RP.b=model4@RP, RP.v=model4@RP.cov)), data=bang, stdout=NULL)) b <- coef(model5) # Display the odds of using contraception in the average district exp(b["FP_Intercept"]) # Display the probability of using contraception in the average district exp(b["FP_Intercept"])/(1 + exp(b["FP_Intercept"])) # Display the district-level VPC and ICC b["RP2_var_Intercept"]/(b["RP2_var_Intercept"] + pi^2/3) # Plot a trajectories (trace) plot of the MCMC chains trajectories(model5) # Plot a fiveway MCMC graphical diagnostic plot sixway(model5@chains[, "RP2_var_Intercept",drop=FALSE]) # Display detailed MCMC summary statistics for the between-district variance summary(model5@chains[, "RP2_var_Intercept",drop=FALSE]) ############################################################################ # 3.2 Random intercept model with predictor variables ############################################################################ # Fit a two-level random intercept model for women's contraceptive use # adjusting for predictor variables by PQL2 (model6 <- runMLwiN(logit(use, cons) ~ 1 + age + lc + urban + educ + hindu + (1 | district), D="Binomial", estoptions=list(nonlinear=c(N=1,M=2)), data=bang, stdout=NULL)) # Fit the previous model by MCMC for a burnin of 1000 iterations followed # by a monitoring period of 10000 iterations. Model takes 72 seconds to fit # on our computer. (model7 <- runMLwiN(logit(use, cons) ~ 1 + age + lc + urban + educ + hindu + (1 | district), D="Binomial", estoptions=list(EstM=1, resi.store=TRUE, mcmcMeth=list(burnin=1000,iterations=10000), startval=list(FP.b=model6@FP, FP.v=model6@FP.cov, RP.b=model6@RP, RP.v=model6@RP.cov)), data=bang, stdout=NULL)) ############################################################################ # 3.3 Random slope models ############################################################################ # Quietly fit a two-level random slope model for contraceptive use by PQL2 model8 <- runMLwiN(logit(use, cons) ~ 1 + age + lc + urban + educ + hindu + (1 + urban | district), D="Binomial", estoptions=list(nonlinear=c(N=1,M=2)), data=bang, stdout=NULL) # Fit the previous model by MCMC using an orthogonal fixed effects # parameterisation to make the MCMC chains less autocorrelated. Model takes # 46 seconds to fit on our computer. (model9 <- runMLwiN(logit(use, cons) ~ 1 + age + lc + urban + educ + hindu + (1 + urban | district), D="Binomial", estoptions=list(EstM=1, resi.store=TRUE, mcmcOptions=list(orth=1), startval=list(FP.b=model8@FP, FP.v=model8@FP.cov, RP.b=model8@RP, RP.v=model8@RP.cov)), data=bang, stdout=NULL)) b <- coef(model9) # Display the between-district variance for women living in rural areas b["RP2_var_Intercept"] # Display the between-district variance for women living in urban areas b["RP2_var_Intercept"] + 2*b["RP2_cov_Intercept_urbanUrban"] + b["RP2_var_urbanUrban"] # Quietly fit the previous model by PQL2 where we add in the two additional # predictor variables model10 <- runMLwiN(logit(use, cons) ~ 1 + age + lc + urban + educ + hindu + d_lit + d_pray + (1 + urban | district), D="Binomial", estoptions=list(nonlinear=c(N=1,M=2)), data=bang, stdout=NULL) # Fit the previous model by MCMC. Model takes 52 seconds to fit on our # computer. (model11 <- runMLwiN(logit(use, cons) ~ 1 + age + lc + urban + educ + hindu + d_lit + d_pray+ (1 + urban | district), D="Binomial", estoptions=list(EstM=1, mcmcOptions=list(orth=1), startval=list(FP.b=model10@FP, FP.v=model10@FP.cov, RP.b=model10@RP, RP.v=model10@RP.cov)), data=bang, stdout=NULL)) b <- coef(model11) # Display the between-district variance for women living in rural areas b["RP2_var_Intercept"] # Display the between-district variance for women living in urban areas b["RP2_var_Intercept"] + 2*b["RP2_cov_Intercept_urbanUrban"] + b["RP2_var_urbanUrban"] ############################################################################ # 4. MORE ADVANCED MODELS ############################################################################ ############################################################################ # 4.1 Multivariate response model ############################################################################ # Load the tutorial data into memory data(tutorial, package = "R2MLwiN") tutorial$girl <- as.integer(tutorial$sex == "girl") # Generate a binary response variable equal to 1 if normexam is positive, # otherwise 0 tutorial$pass <- as.integer(tutorial$normexam > 0) # Quietly fit a two-level students-within-schools bivariate mixed response # type model to continuous age 11 scores and binary age 16 scores model12 <- runMLwiN(c(standlrt,probit(pass, cons)) ~ 1 + girl + (1 | school) + (1[1] | student), D=c("Mixed", "Normal", "Binomial"), data=tutorial, stdout=NULL) # Fit the previous model by MCMC. Model takes 239 seconds to fit on our # computer. (model13 <- runMLwiN(c(standlrt,probit(pass, cons)) ~ 1 + girl + (1 | school) + (1[1] | student), D=c("Mixed", "Normal", "Binomial"), estoptions=list(EstM=1, mcmcOptions=list(orth=1), startval=list(FP.b=model12@FP, FP.v=model12@FP.cov, RP.b=model12@RP, RP.v=model12@RP.cov)), data=tutorial, stdout=NULL)) b <- coef(model13) # Display the boy predicted probability of passing the age 16 exams b["FP_Intercept_2"] # Display the girl predicted probability of passing the age 16 exams pnorm(b["FP_Intercept_2"] + b["FP_girl_pass"]) ############################################################################ # 4.2 Ordinal response model ############################################################################ # Load the Bangladeshi contraceptive use data into memory data(bang, package = "R2MLwiN") # Set Not_using_contraception (4) as the base category # bang$use4 <- relevel(bang$use4, ref="Not_using_contraception") # Fit a two-level women-within-districts ordinal logit model by MQL1 for # contraceptive use model14 <- runMLwiN(logit(use4, cons, 4) ~ 1 + age[1:3] + lc[1:3] + urban[1:3] + (1[1:3] | district), D="Ordered Multinomial", data=bang, stdout=NULL) # Fit the previous model by MCMC. Model takes 62 seconds to fit on our # computer. (model15 <- runMLwiN(logit(use4, cons, 4) ~ 1 + age[1:3] + lc[1:3] + urban[1:3] + (1[1:3] | district), D="Ordered Multinomial", estoptions=list(EstM=1, mcmcOptions=list(orth=1), startval=list(FP.b=model14@FP, FP.v=model14@FP.cov, RP.b=model14@RP, RP.v=model14@RP.cov)), data=bang, stdout=NULL)) ############################################################################ # 4.3 Nominal response model ############################################################################ # Load the Bangladeshi contraceptive use data into memory data(bang, package = "R2MLwiN") # Set Not_using_contraception (4) as the base category bang$use4 <- relevel(bang$use4, ref="Not_using_contraception") # Fit a two-level women-within-districts nominal logit model by MQL1 for # contraceptive use model16 <- runMLwiN(logit(use4, cons) ~ 1 + age + lc + urban + (1 | district), D="Unordered Multinomial", data=bang, stdout=NULL) # Fit the previous model by MCMC. Model takes 171 seconds to fit on our # computer. (model17 <- runMLwiN(logit(use4, cons) ~ 1 + age + lc + urban + (1 | district), D="Unordered Multinomial", estoptions=list(EstM=1, resi.store=TRUE, mcmcOptions=list(orth=1), startval=list(FP.b=model16@FP, FP.v=model16@FP.cov, RP.b=model16@RP, RP.v=model16@RP.cov)), data=bang, stdout=NULL)) b <- coef(model17) # Generate the numerator of the predicted probability expression for the # predicted probability that use4=Sterilization as a function of age and urban, but # holding the number of living children constant at 2 and holding district # unobservables constant at zero n1 <- exp(b["FP_Intercept_Sterilization"] + b["FP_age_Sterilization"]*bang$age + b["FP_lcTwo_children_Sterilization"] + b["FP_urbanUrban_Sterilization"]*as.integer(bang$urban=="Urban")) # Generate the numerator of the predicted probability expression for the # predicted probability that use4=Modern_reversible_method as a function of age and urban, but # holding the number of living children constant at 2 and holding district # unobservables constant at zero n2 <- exp(b["FP_Intercept_Modern_reversible_method"] + b["FP_age_Modern_reversible_method"]*bang$age + b["FP_lcTwo_children_Modern_reversible_method"] + b["FP_urbanUrban_Modern_reversible_method"]*as.integer(bang$urban=="Urban")) # Generate the numerator of the predicted probability expression for the # predicted probability that use4=Traditional_method as a function of age and urban, but # holding the number of living children constant at 2 and holding district # unobservables constant at zero n3 <- exp(b["FP_Intercept_Traditional_method"] + b["FP_age_Traditional_method"]*bang$age + b["FP_lcTwo_children_Traditional_method"] + b["FP_urbanUrban_Traditional_method"]*as.integer(bang$urban=="Urban")) # Generate the denominator of the predicted probability expression d <- (1 + n1 + n2 + n3) # Generate the predicted probability that use4=1 p1 <- n1 / d # Generate the predicted probability that use4=2 p2 <- n3 / d # Generate the predicted probability that use4=3 p3 <- n2 / d # Generate the predicted probability that use4=4 p4 <- 1 - p1 - p2 - p3 # Generate a new variable actualage which stores the actual age of each # woman actualage <- bang$age + 30 plotdata <- data.frame(p1=p1,p2=p2,p3=p3,p4=p4,age=actualage,urban=bang$urban) plotdata <- plotdata[with(plotdata, order(age)), ] if (!require(lattice)) install.packages("lattice"); library(lattice) xyplot(p4+p3+p2+p1~age|urban, data=plotdata, ylab="Probability", xlab="Woman's age (years)", type="l", ylim=c(0,1), auto.key=list(points=FALSE, lines=TRUE, text=c("No contraception", "Traditional method", "Modern method", "Modern method"))) # Drop the variables plotdata, n1, n2, n3, d, p1, p2, p3 and p4 rm(plotdata,n1,n2,n3,d,p1,p2,p3,p4) # Generate the numerator of the predicted probability expression for the # predicted probability that use4=Sterilization as a function of number of living # children, urban and district, but holding age constant at 30 n1 <- exp(b["FP_Intercept_Sterilization"] + b["FP_lcOne_child_Sterilization"]*as.integer(bang$lc == "One_child") + b["FP_lcTwo_children_Sterilization"]*as.integer(bang$lc == "Two_children") + b["FP_lcThree_plus_Sterilization"]*as.integer(bang$lc == "Three_plus") + b["FP_urbanUrban_Sterilization"]*as.integer(bang$urban=="Urban") + model17@residual$lev_2_resi_est_Intercept.Sterilization[bang$district]) # Generate the numerator of the predicted probability expression for the # predicted probability that use4=Modern_reversible_method as a function of number of living # children, urban and district, but holding age constant at 30 n2 <- exp(b["FP_Intercept_Modern_reversible_method"] + b["FP_lcOne_child_Modern_reversible_method"]*as.integer(bang$lc == "One_child") + b["FP_lcTwo_children_Modern_reversible_method"]*as.integer(bang$lc == "Two_children") + b["FP_lcThree_plus_Modern_reversible_method"]*as.integer(bang$lc == "Three_plus") + b["FP_urbanUrban_Modern_reversible_method"]*as.integer(bang$urban=="Urban") + model17@residual$lev_2_resi_est_Intercept.Modern_reversible_method[bang$district]) # Generate the numerator of the predicted probability expression for the # predicted probability that use4=Traditional_method as a function of number of living # children, urban and district, but holding age constant at 30 n3 <- exp(b["FP_Intercept_Traditional_method"] + b["FP_lcOne_child_Traditional_method"]*as.integer(bang$lc == "One_child") + b["FP_lcTwo_children_Traditional_method"]*as.integer(bang$lc == "Two_children") + b["FP_lcThree_plus_Traditional_method"]*as.integer(bang$lc == "Three_plus") + b["FP_urbanUrban_Traditional_method"]*as.integer(bang$urban=="Urban") + model17@residual$lev_2_resi_est_Intercept.Traditional_method[bang$district]) # Generate the denominator of the predicted probability expression d <- (1 + n1 + n2 + n3) # Generate the predicted probability that use4=1 p1 <- n1 / d # Generate the predicted probability that use4=2 p2 <- n3 / d # Generate the predicted probability that use4=3 p3 <- n2 / d # Generate the predicted probability that use4=4 p4 <- 1 - p1 - p2 - p3 # Generate a new variable lc_p4 equal to lc minus 0.15 lc_p4 <- as.integer(bang$lc) - 0.15 # Generate a new variable lc_p4 equal to lc minus 0.075 lc_p3 <- as.integer(bang$lc) - 0.075 # Generate a new variable lc_p4 equal to lc plus 0.075 lc_p2 <- as.integer(bang$lc) + 0.075 # Generate a new variable lc_p4 equal to lc plus 0.15 lc_p1 <- as.integer(bang$lc) + 0.15 plotdata <- data.frame(p1=p1,p2=p2,p3=p3,p4=p4,lc_p1=lc_p1,lc_p2=lc_p2,lc_p3=lc_p3,lc_p4=lc_p4,urban=bang$urban) xyplot(c(p4,p3,p2,p1)~lc_p4+lc_p3+lc_p2+lc_p1|urban, outer=FALSE, data=plotdata, ylab="Probability", xlab="Number of living children", type="p", ylim=c(0,1), scales=list(x=list(labels=c(NA,"0","1","2","3+"))), auto.key=list(points=TRUE, lines=FALSE, text=c("No contraception", "Traditional method", "Modern method", "Modern method"))) # Drop the variables plotdata, n1, n2, n3, lc_p1, lc_p2, lc_p3 and lc_p4 rm(plotdata,n1,n2,n3,lc_p1,lc_p2,lc_p3,lc_p4) ############################################################################