## R script to run examples from: Zhang Z, Parker RMA, Charlton CMJ, Leckie G, ## Browne WJ. R2MLwiN: A Package to Run MLwiN from within R, Journal of ## Statistical Software. ## NB the predLines function uses a lot of contiguous memory, and so we recommend ## running via 64-bit version R to mititate against any potential problems. ## 1: Introduction ############################################################# #### 1.3: The R2MLwiN package library("R2MLwiN") ## Returns current path to MLwiN - please change if your path differs (see below): options("MLwiN_path") ## To change current path: options(MLwiN_path = 'path/to/MLwiN vX.XX/') demo(package = "R2MLwiN") ## To run a demo: demo("MCMCGuide03") ## 2: Fitting a 2-level continuous response model using IGLS ################### data("tutorial") F1 <- normexam ~ 1 + (1 | school) + (1 | student) (VarCompModel <- runMLwiN(Formula = F1, data = tutorial)) #### 2.1 Conducting a likelihood ratio test F2 <- normexam ~ 1 + (1 | student) OneLevelModel <- runMLwiN(Formula = F2, data = tutorial) if (!require(lmtest)) install.packages("lmtest") lrtest(OneLevelModel, VarCompModel) #### 2.2: Calculating the variance partition coefficient (VPC) slotNames(VarCompModel) print(VPC <- VarCompModel["RP"][["RP2_var_Intercept"]]/(VarCompModel["RP"][["RP1_var_Intercept"]] + VarCompModel["RP"][["RP2_var_Intercept"]])) rm(VarCompModel) rm(OneLevelModel) ## 3: Storing residuals ######################################################## VarCompResid <- runMLwiN(F1, data = tutorial, estoptions = list(resi.store = TRUE)) residuals <- VarCompResid@residual$lev_2_resi_est_Intercept residualsCI <- 1.96 * sqrt(VarCompResid@residual$lev_2_resi_var_Intercept) residualsRank <- rank(residuals) rankno <- order(residualsRank) caterpillar(y = residuals[rankno], x = 1:65, qtlow = (residuals - residualsCI)[rankno], qtup = (residuals + residualsCI)[rankno], xlab = "Rank", ylab = "Intercept") rm(VarCompResid) # Illustrating debugmode (VarCompResidDebug <- runMLwiN(Formula = F1, data = tutorial, estoptions = list(debugmode = TRUE))) rm(VarCompResidDebug) # Illustrating show.file (VarCompResidShowFile <- runMLwiN(Formula = F1, data = tutorial, estoptions = list(show.file = TRUE, clean.files = FALSE))) rm(VarCompResidShowFile) ## 5: Fitting a 2-level continuous response model using MCMC ################### (VarCompMCMC <- runMLwiN(Formula = F1, data = tutorial, estoptions = list(EstM = 1))) # Example changing away from defaults ChangeEstOptions <- runMLwiN(Formula = F1, data = tutorial, estoptions = list(EstM = 1, mcmcMeth = list(burnin = 1000, nchains = 3, iterations = 10000, thinning = 10, seed = 1:3))) trajectories(VarCompMCMC) #### 5.1: Calculating the variance partition coefficient VPC_MCMC <- VarCompMCMC["chains"][, "RP2_var_Intercept"]/(VarCompMCMC["chains"][, "RP1_var_Intercept"] + VarCompMCMC["chains"][, "RP2_var_Intercept"]) sixway(VPC_MCMC, name = "VPC") ## 6: Adding a predictor to the fixed part of a model ########################## F3 <- normexam ~ 1 + standlrt + (1 | school) + (1 | student) (standlrtMCMC <- runMLwiN(Formula = F3, data = tutorial, estoptions = list(EstM = 1))) print(standlrtMCMC, z.ratio = FALSE) VarCompMCMC["BDIC"][["DIC"]] - standlrtMCMC["BDIC"][["DIC"]] rm(VarCompMCMC) ## 7: Priors, starting values, and random seeds ################################ informpriorMCMC <- runMLwiN(Formula = F3, data = tutorial, estoptions = list(EstM = 1, mcmcMeth = list(priorParam = list(fixe = list(standlrt = c(1, 0.01)))))) PlotDensities <- data.frame(PostAndPrior = c(informpriorMCMC["chains"][, "FP_standlrt"], qnorm(seq(1/5000, 1, by = 1/5000), mean = 1, sd = 0.01)), id = c(rep("Posterior", 5000), rep("Prior", 5000))) if (!require(lattice)) install.packages("lattice") densityplot(~PostAndPrior, data = PlotDensities, groups = id, ref = TRUE, plot.points = FALSE, auto.key = list(space = "bottom"), xlab = NULL) rm(informpriorMCMC) # Specifying own starting values (instead of IGLS estimates from MLwiN) OurStartingValues <- list(FP.b = c(-2, 5), RP.b = c(2, 4)) startvalMCMC <- runMLwiN(F3, data = tutorial, estoptions = list(EstM = 1, startval = OurStartingValues, mcmcMeth = list(burnin = 0, iterations = 500))) trajectories(startvalMCMC["chains"], Range = c(1, 500)) rm(startvalMCMC) # Illustrating a change of random seed (seedMCMC <- runMLwiN(Formula = F3, data = tutorial, estoptions = list(EstM = 1, mcmcMeth = list(seed = 2)))) rm(seedMCMC) ## 8: Adding a random slope / coefficient ###################################### F4 <- normexam ~ 1 + standlrt + (1 + standlrt | school) + (1 | student) (standlrtRS_MCMC <- runMLwiN(Formula = F4, data = tutorial, estoptions = list(EstM = 1, resi.store.levs = 2))) ComparingModels <- rbind(standlrtMCMC["BDIC"], standlrtRS_MCMC["BDIC"]) rownames(ComparingModels) <- c("Random intercept", "Random slope") ComparingModels predLines(standlrtRS_MCMC, xname = "standlrt", lev = 2, probs = c(0.025, 0.975), legend = FALSE) predLines(standlrtRS_MCMC, xname = "standlrt", lev = 2, selected = c(30, 44, 53, 59), probs = c(0.025, 0.975), legend = TRUE) rm(standlrtMCMC) rm(standlrtRS_MCMC) ## 9: Modeling complex level 1 variance ######################################## F5 <- normexam ~ 1 + standlrt + (1 + standlrt | school) + (1 + standlrt | student) standlrtC1V_MCMC <- runMLwiN(Formula = F5, data = tutorial, estoptions = (list(EstM = 1, mcmcMeth = list(lclo = 1)))) l2varfn <- standlrtC1V_MCMC["RP"][["RP2_var_Intercept"]] + 2 * standlrtC1V_MCMC["RP"][["RP2_cov_Intercept_standlrt"]] * tutorial[["standlrt"]] + standlrtC1V_MCMC["RP"][["RP2_var_standlrt"]] * tutorial[["standlrt"]]^2 l1varfn <- 1/exp(standlrtC1V_MCMC["RP"][["RP1_var_Intercept"]] + 2 * standlrtC1V_MCMC["RP"][["RP1_cov_Intercept_standlrt"]] * tutorial[["standlrt"]] + standlrtC1V_MCMC["RP"][["RP1_var_standlrt"]] * tutorial[["standlrt"]]^2) plot(sort(tutorial[["standlrt"]]), l2varfn[order(tutorial[["standlrt"]])], xlab = "standlrt", ylab = "variance", ylim = c(0, 0.7), type = "l") lines(sort(tutorial[["standlrt"]]), l1varfn[order(tutorial[["standlrt"]])], lty = "longdash") abline(v = 0, lty = "dotted") legend("bottomright", legend = c("between-student", "between-school"), lty = c("longdash", "solid")) trajectories(standlrtC1V_MCMC["chains"][, "FP_Intercept", drop = FALSE], Range = c(4501, 5000)) trajectories(standlrtC1V_MCMC["chains"][, "RP2_var_Intercept", drop = FALSE], Range = c(4501, 5000)) trajectories(standlrtC1V_MCMC["chains"][, "RP1_var_Intercept", drop = FALSE], Range = c(4501, 5000)) rm(standlrtC1V_MCMC) ## 10: Fitting a 2-level binary response model using MCMC ###################### data("bang1") F6 <- logit(use) ~ 1 + age + lc + urban + (1 + urban | district) binomialMCMC <- runMLwiN(Formula = F6, D = "Binomial", data = bang1, estoptions = list(EstM = 1)) trajectories(binomialMCMC["chains"][, "FP_Intercept", drop = FALSE]) ## 11: Alternative MCMC methods implemented in MLwiN ########################### #### 11.1: An example using orthogonal parameterisation to improve mixing (OrthogbinomialMCMC <- runMLwiN(Formula = F6, D = "Binomial", data = bang1, estoptions = list(EstM = 1, mcmcOptions = list(orth = 1)))) trajectories(OrthogbinomialMCMC["chains"][, "FP_Intercept", drop = FALSE]) ## 12: Using R2MLwiN to write BUGS code ######################################## # If not running via Windows (or Wine), and WinBUGS not available, try OpenBUGS # Please change line below if path different openbugs <- "C:/Program Files (x86)/OpenBUGS/OpenBUGS323/OpenBUGS.exe" BUGSmodel <- runMLwiN(Formula = F6, D = "Binomial", data = bang1, estoptions = list(EstM = 1, mcmcMeth = list(burnin = 500, iterations = 5000, thinning = 1)), BUGO = c(n.chains = 1, debug = FALSE, seed = 1, bugs = openbugs, OpenBugs = TRUE)) summary(BUGSmodel) cc <- cbind(binomialMCMC["FP"], OrthogbinomialMCMC["FP"]) dd <- cbind(head(binomialMCMC["RP"], -1), head(OrthogbinomialMCMC["RP"], -1)) ESS.binMCMC <- effectiveSize(binomialMCMC["chains"][, 2:10]) ESS.orthogMCMC <- effectiveSize(OrthogbinomialMCMC["chains"][, 2:10]) ESStable <- round(rbind(cc, dd), 3) BUGSlist <- c(1:6, 8, 9, 11) BUGS.Coeff <- round(summary(BUGSmodel)$statistics[BUGSlist, 1], 3) BUGS.ESS <- as.data.frame(effectiveSize(BUGSmodel)) ESStable <- cbind(ESStable[, 1], round(ESS.binMCMC), ESStable[, 2], round(ESS.orthogMCMC), BUGS.Coeff, round(BUGS.ESS[BUGSlist, ])) colnames(ESStable) <- c("A)Coeff.", "A)ESS", "B)Coeff.", "B)ESS", "C)Coeff.", "C)ESS") cat("NB: A = MLwiN(non-orthog.), B = MLwiN(orthog.), C = WinBUGS\n");ESStable rm(binomialMCMC) rm(OrthogbinomialMCMC) rm(BUGSmodel) ## 13: Modeling a cross-classified data structure ############################## data("xc") F7 <- attain ~ 1 + (1 | sid) + (1 | pid) + (1 | pupil) (XCModel <- runMLwiN(Formula = F7, data = xc, estoptions = list(xc = TRUE, EstM = 1))) PercentExplainedBy <- cbind(XCModel["RP"][["RP3_var_Intercept"]]/(XCModel["RP"][["RP1_var_Intercept"]] + XCModel["RP"][["RP2_var_Intercept"]] + XCModel["RP"][["RP3_var_Intercept"]]) * 100, XCModel["RP"][["RP2_var_Intercept"]]/(XCModel["RP"][["RP1_var_Intercept"]] + XCModel["RP"][["RP2_var_Intercept"]] + XCModel["RP"][["RP3_var_Intercept"]]) * 100) colnames(PercentExplainedBy) <- c("sid", "pid") PercentExplainedBy rm(XCModel) ## 14: Modeling a multiple membership data structure ########################### data("wage1") F8 <- logearn ~ 1 + age_40 + numjobs + (1 | company) + (1 | id) OurMultiMemb <- list(list(mmvar = list("company", "company2", "company3", "company4"), weights = list("weight1", "weight2", "weight3", "weight4")), NA) (MMembModel <- runMLwiN(Formula = F8, data = wage1, estoptions = list(EstM = 1, drop.data = FALSE, mm = OurMultiMemb))) MMembModel["RP"][["RP2_var_Intercept"]]/(MMembModel["RP"][["RP2_var_Intercept"]] + MMembModel["RP"][["RP1_var_Intercept"]]) * 100 rm(MMembModel) ## 15: Multivariate response models ############################################ data("jspmix1") F9 <- c(english, probit(behaviour)) ~ 1 + sex + ravens + fluent[1] + (1 | school) + (1[1] | id) (MixedRespMCMC <- runMLwiN(Formula = F9, D = c("Mixed", "Normal", "Binomial"), data = jspmix1, estoptions = list(EstM = 1, mcmcMeth = list(fixM = 1, residM = 1, Lev1VarM = 2)))) CompareCorrelation <- cbind(MixedRespMCMC["RP"][["RP2_cov_Intercept_1_Intercept_2"]]/sqrt(MixedRespMCMC["RP"][["RP2_var_Intercept_1"]] * MixedRespMCMC["RP"][["RP2_var_Intercept_2"]]), MixedRespMCMC["RP"][["RP1_cov_bcons_2_Intercept_1"]]/sqrt(MixedRespMCMC["RP"][["RP1_var_Intercept_1"]] * MixedRespMCMC["RP"][["RP1_var_bcons_2"]])) colnames(CompareCorrelation) <- c("school-level", "pupil-level") CompareCorrelation ExplainedBySchool <- cbind(MixedRespMCMC["RP"][["RP2_var_Intercept_1"]]/(MixedRespMCMC["RP"][["RP1_var_Intercept_1"]] + MixedRespMCMC["RP"][["RP2_var_Intercept_1"]]) * 100, MixedRespMCMC["RP"][["RP2_var_Intercept_2"]]/(MixedRespMCMC["RP"][["RP1_var_bcons_2"]] + MixedRespMCMC["RP"][["RP2_var_Intercept_2"]]) * 100) colnames(ExplainedBySchool) <- c("english", "behaviour") ExplainedBySchool rm(MixedRespMCMC) ## 16: Running simulations / fitting multiple models ########################### set.seed(1) pupil <- 1:108 school <- rep(1:6, each = 18) F10 <- resp ~ 1 + (1 | school) + (1 | pupil) ns <- 100 IGLS_array <- MCMC_array <- array(, c(3, 2, ns)) MCMC_median <- data.frame(RP2_var_Intercept = rep(0, ns), RP1_var_Intercept = rep(0, ns)) CounterMCMC <- rep(0, 3) Actual <- c(30, 10, 40) for (i in 1:ns) { u_short <- rnorm(6, 0, sqrt(Actual[2])) u <- rep(u_short, each = 18, len = 108) e <- rnorm(108, 0, sqrt(Actual[3])) resp <- Actual[1] * 1 + u + e simData <- data.frame(cbind(pupil, school, resp)) simModelIGLS <- runMLwiN(F10, data = simData) IGLS_array[, , i] <- cbind(coef(simModelIGLS), diag(vcov(simModelIGLS))) simModelMCMC <- runMLwiN(F10, estoptions = list(EstM = 1), data = simData) MCMC_array[, , i] <- cbind(coef(simModelMCMC), diag(vcov(simModelMCMC))) MCMC_median[i, ] <- c(median(simModelMCMC["chains"][, "RP2_var_Intercept"]), median(simModelMCMC["chains"][, "RP1_var_Intercept"])) if (Actual[1] > quantile(simModelMCMC["chains"][, "FP_Intercept"], 0.025) & Actual[1] < quantile(simModelMCMC["chains"][, "FP_Intercept"], 0.975)) { CounterMCMC[1] <- CounterMCMC[1] + 1 } if (Actual[2] > quantile(simModelMCMC["chains"][, "RP2_var_Intercept"], 0.025) & Actual[2] < quantile(simModelMCMC["chains"][, "RP2_var_Intercept"], 0.975)) { CounterMCMC[2] <- CounterMCMC[2] + 1 } if (Actual[3] > quantile(simModelMCMC["chains"][, "RP1_var_Intercept"], 0.025) & Actual[3] < quantile(simModelMCMC["chains"][, "RP1_var_Intercept"], 0.975)) { CounterMCMC[3] <- CounterMCMC[3] + 1 } } aa <- sapply(1:ns, function(x) na.omit(stack(as.data.frame(IGLS_array[, , x])))$values) counterIGLS <- rep(0, 3) for (i in 1:ns) { if (Actual[1] > aa[1, i] - 1.96 * sqrt(aa[4, i]) & Actual[1] < aa[1, i] + 1.96 * sqrt(aa[4, i])) { counterIGLS[1] <- counterIGLS[1] + 1 } if (Actual[2] > aa[2, i] - 1.96 * sqrt(aa[5, i]) & Actual[2] < aa[2, i] + 1.96 * sqrt(aa[5, i])) { counterIGLS[2] <- counterIGLS[2] + 1 } if (Actual[3] > aa[3, i] - 1.96 * sqrt(aa[6, i]) & Actual[3] < aa[3, i] + 1.96 * sqrt(aa[6, i])) { counterIGLS[3] <- counterIGLS[3] + 1 } } Percent_interval_coverage <- (counterIGLS/ns) * 100 Mean_across_simus <- round(c(mean(aa[1, ]), mean(aa[2, ]), mean(aa[3, ])), 2) Percent_bias <- round(-100 * (1 - Mean_across_simus/Actual), 2) IGLS_results <- cbind(Mean_across_simus, Actual, Percent_bias, Percent_interval_coverage) rownames(IGLS_results) <- c("beta0", "sigma2_u", "sigma2_e") Percent_interval_coverage <- (CounterMCMC/ns) * 100 bb <- sapply(1:ns, function(x) na.omit(stack(as.data.frame(MCMC_array[, , x])))$values) Mean_across_simus <- round(c(mean(bb[1, ]), mean(bb[2, ]), mean(bb[3, ])), 2) Percent_bias <- round(-100 * (1 - Mean_across_simus/Actual), 2) MCMC_results <- cbind(Mean_across_simus, Actual, Percent_bias, Percent_interval_coverage) rownames(MCMC_results) <- c("beta0", "sigma2_u", "sigma2_e") cat("Simulation results using IGLS\n");IGLS_results cat("Simulation results using MCMC\n");MCMC_results Mean_across_simus <- round(c(mean(MCMC_median$RP2_var_Intercept), mean(MCMC_median$RP1_var_Intercept)), 2) Actual <- tail(Actual, -1) Percent_bias <- round(-100 * (1 - Mean_across_simus/Actual), 2) Percent_interval_coverage <- tail(Percent_interval_coverage, -1) MCMC_results2 <- cbind(Mean_across_simus, Actual, Percent_bias, Percent_interval_coverage) rownames(MCMC_results2) <- c("sigma2_u", "sigma2_e") cat("Simulation results based on median MCMC estimates\n");MCMC_results2