## R script to run examples from: Zhang Z, Parker RMA, Charlton CMJ, Leckie G,
## Browne WJ. R2MLwiN: A Package to Run MLwiN from within R, Journal of
## Statistical Software.

## NB the predLines function uses a lot of contiguous memory, and so we recommend
## running via 64-bit version R to mititate against any potential problems.

## 1: Introduction #############################################################

#### 1.3: The R2MLwiN package

library("R2MLwiN")

## Returns current path to MLwiN - please change if your path differs (see below):
options("MLwiN_path")

## To change current path: options(MLwiN_path = 'path/to/MLwiN vX.XX/')

demo(package = "R2MLwiN")

## To run a demo:
demo("MCMCGuide03")


## 2: Fitting a 2-level continuous response model using IGLS ###################

data("tutorial")

F1 <- normexam ~ 1 + (1 | school) + (1 | student)

(VarCompModel <- runMLwiN(Formula = F1, data = tutorial))

#### 2.1 Conducting a likelihood ratio test

F2 <- normexam ~ 1 + (1 | student)

OneLevelModel <- runMLwiN(Formula = F2, data = tutorial)

if (!require(lmtest)) install.packages("lmtest")

lrtest(OneLevelModel, VarCompModel)

#### 2.2: Calculating the variance partition coefficient (VPC)

slotNames(VarCompModel)

print(VPC <- VarCompModel["RP"][["RP2_var_Intercept"]]/(VarCompModel["RP"][["RP1_var_Intercept"]] + 
  VarCompModel["RP"][["RP2_var_Intercept"]]))

rm(VarCompModel)
rm(OneLevelModel)


## 3: Storing residuals ########################################################

VarCompResid <- runMLwiN(F1, data = tutorial, estoptions = list(resi.store = TRUE))

residuals <- VarCompResid@residual$lev_2_resi_est_Intercept
residualsCI <- 1.96 * sqrt(VarCompResid@residual$lev_2_resi_var_Intercept)
residualsRank <- rank(residuals)
rankno <- order(residualsRank)

caterpillar(y = residuals[rankno], x = 1:65, qtlow = (residuals - residualsCI)[rankno], 
  qtup = (residuals + residualsCI)[rankno], xlab = "Rank", ylab = "Intercept")
rm(VarCompResid)

# Illustrating debugmode
(VarCompResidDebug <- runMLwiN(Formula = F1, data = tutorial, estoptions = list(debugmode = TRUE)))

rm(VarCompResidDebug)

# Illustrating show.file
(VarCompResidShowFile <- runMLwiN(Formula = F1, data = tutorial, estoptions = list(show.file = TRUE, 
  clean.files = FALSE)))

rm(VarCompResidShowFile)


## 5: Fitting a 2-level continuous response model using MCMC ###################

(VarCompMCMC <- runMLwiN(Formula = F1, data = tutorial, estoptions = list(EstM = 1)))

# Example changing away from defaults
ChangeEstOptions <- runMLwiN(Formula = F1, data = tutorial, estoptions = list(EstM = 1, 
  mcmcMeth = list(burnin = 1000, nchains = 3, iterations = 10000, thinning = 10, 
    seed = 1:3)))

trajectories(VarCompMCMC)

#### 5.1: Calculating the variance partition coefficient

VPC_MCMC <- VarCompMCMC["chains"][, "RP2_var_Intercept"]/(VarCompMCMC["chains"][, 
  "RP1_var_Intercept"] + VarCompMCMC["chains"][, "RP2_var_Intercept"])
sixway(VPC_MCMC, name = "VPC")


## 6: Adding a predictor to the fixed part of a model ##########################

F3 <- normexam ~ 1 + standlrt + (1 | school) + (1 | student)
(standlrtMCMC <- runMLwiN(Formula = F3, data = tutorial, estoptions = list(EstM = 1)))

print(standlrtMCMC, z.ratio = FALSE)

VarCompMCMC["BDIC"][["DIC"]] - standlrtMCMC["BDIC"][["DIC"]]

rm(VarCompMCMC)


## 7: Priors, starting values, and random seeds ################################

informpriorMCMC <- runMLwiN(Formula = F3, data = tutorial, estoptions = list(EstM = 1, 
  mcmcMeth = list(priorParam = list(fixe = list(standlrt = c(1, 0.01))))))

PlotDensities <- data.frame(PostAndPrior = c(informpriorMCMC["chains"][, "FP_standlrt"], 
  qnorm(seq(1/5000, 1, by = 1/5000), mean = 1, sd = 0.01)), id = c(rep("Posterior", 
  5000), rep("Prior", 5000)))

if (!require(lattice)) install.packages("lattice")

densityplot(~PostAndPrior, data = PlotDensities, groups = id, ref = TRUE, plot.points = FALSE, 
  auto.key = list(space = "bottom"), xlab = NULL)

rm(informpriorMCMC)

# Specifying own starting values (instead of IGLS estimates from MLwiN)
OurStartingValues <- list(FP.b = c(-2, 5), RP.b = c(2, 4))
startvalMCMC <- runMLwiN(F3, data = tutorial, estoptions = list(EstM = 1, startval = OurStartingValues, 
  mcmcMeth = list(burnin = 0, iterations = 500)))

trajectories(startvalMCMC["chains"], Range = c(1, 500))

rm(startvalMCMC)

# Illustrating a change of random seed
(seedMCMC <- runMLwiN(Formula = F3, data = tutorial, estoptions = list(EstM = 1, 
  mcmcMeth = list(seed = 2))))

rm(seedMCMC)


## 8: Adding a random slope / coefficient ######################################

F4 <- normexam ~ 1 + standlrt + (1 + standlrt | school) + (1 | student)
(standlrtRS_MCMC <- runMLwiN(Formula = F4, data = tutorial, estoptions = list(EstM = 1, 
  resi.store.levs = 2)))

ComparingModels <- rbind(standlrtMCMC["BDIC"], standlrtRS_MCMC["BDIC"])
rownames(ComparingModels) <- c("Random intercept", "Random slope")
ComparingModels

predLines(standlrtRS_MCMC, xname = "standlrt", lev = 2, probs = c(0.025, 0.975), 
  legend = FALSE)

predLines(standlrtRS_MCMC, xname = "standlrt", lev = 2, selected = c(30, 44, 53, 
  59), probs = c(0.025, 0.975), legend = TRUE)

rm(standlrtMCMC)
rm(standlrtRS_MCMC)


## 9: Modeling complex level 1 variance ########################################

F5 <- normexam ~ 1 + standlrt + (1 + standlrt | school) + (1 + standlrt | student)
standlrtC1V_MCMC <- runMLwiN(Formula = F5, data = tutorial, estoptions = (list(EstM = 1, 
  mcmcMeth = list(lclo = 1))))

l2varfn <- standlrtC1V_MCMC["RP"][["RP2_var_Intercept"]] + 2 * standlrtC1V_MCMC["RP"][["RP2_cov_Intercept_standlrt"]] * 
  tutorial[["standlrt"]] + standlrtC1V_MCMC["RP"][["RP2_var_standlrt"]] * tutorial[["standlrt"]]^2
l1varfn <- 1/exp(standlrtC1V_MCMC["RP"][["RP1_var_Intercept"]] + 2 * standlrtC1V_MCMC["RP"][["RP1_cov_Intercept_standlrt"]] * 
  tutorial[["standlrt"]] + standlrtC1V_MCMC["RP"][["RP1_var_standlrt"]] * tutorial[["standlrt"]]^2)

plot(sort(tutorial[["standlrt"]]), l2varfn[order(tutorial[["standlrt"]])], xlab = "standlrt", 
  ylab = "variance", ylim = c(0, 0.7), type = "l")

lines(sort(tutorial[["standlrt"]]), l1varfn[order(tutorial[["standlrt"]])], lty = "longdash")
abline(v = 0, lty = "dotted")
legend("bottomright", legend = c("between-student", "between-school"), lty = c("longdash", 
  "solid"))

trajectories(standlrtC1V_MCMC["chains"][, "FP_Intercept", drop = FALSE], Range = c(4501, 
  5000))

trajectories(standlrtC1V_MCMC["chains"][, "RP2_var_Intercept", drop = FALSE], Range = c(4501, 
  5000))

trajectories(standlrtC1V_MCMC["chains"][, "RP1_var_Intercept", drop = FALSE], Range = c(4501, 
  5000))

rm(standlrtC1V_MCMC)


## 10: Fitting a 2-level binary response model using MCMC ######################

data("bang1")

F6 <- logit(use) ~ 1 + age + lc + urban + (1 + urban | district)

binomialMCMC <- runMLwiN(Formula = F6, D = "Binomial", data = bang1, estoptions = list(EstM = 1))

trajectories(binomialMCMC["chains"][, "FP_Intercept", drop = FALSE])


## 11: Alternative MCMC methods implemented in MLwiN ###########################

#### 11.1: An example using orthogonal parameterisation to improve mixing

(OrthogbinomialMCMC <- runMLwiN(Formula = F6, D = "Binomial", data = bang1, estoptions = list(EstM = 1, 
  mcmcOptions = list(orth = 1))))

trajectories(OrthogbinomialMCMC["chains"][, "FP_Intercept", drop = FALSE])


## 12: Using R2MLwiN to write BUGS code ########################################

# If not running via Windows (or Wine), and WinBUGS not available, try OpenBUGS
# Please change line below if path different
openbugs <- "C:/Program Files (x86)/OpenBUGS/OpenBUGS323/OpenBUGS.exe"
BUGSmodel <- runMLwiN(Formula = F6, D = "Binomial", data = bang1, estoptions = list(EstM = 1, 
  mcmcMeth = list(burnin = 500, iterations = 5000, thinning = 1)), BUGO = c(n.chains = 1, 
  debug = FALSE, seed = 1, bugs = openbugs, OpenBugs = TRUE))
summary(BUGSmodel)
cc <- cbind(binomialMCMC["FP"], OrthogbinomialMCMC["FP"])
dd <- cbind(head(binomialMCMC["RP"], -1), head(OrthogbinomialMCMC["RP"], -1))
ESS.binMCMC <- effectiveSize(binomialMCMC["chains"][, 2:10])
ESS.orthogMCMC <- effectiveSize(OrthogbinomialMCMC["chains"][, 2:10])
ESStable <- round(rbind(cc, dd), 3)
BUGSlist <- c(1:6, 8, 9, 11)
BUGS.Coeff <- round(summary(BUGSmodel)$statistics[BUGSlist, 1], 3)
BUGS.ESS <- as.data.frame(effectiveSize(BUGSmodel))
ESStable <- cbind(ESStable[, 1], round(ESS.binMCMC), ESStable[, 2], round(ESS.orthogMCMC), 
  BUGS.Coeff, round(BUGS.ESS[BUGSlist, ]))
colnames(ESStable) <- c("A)Coeff.", "A)ESS", "B)Coeff.", "B)ESS", "C)Coeff.", "C)ESS")
cat("NB: A = MLwiN(non-orthog.), B = MLwiN(orthog.), C = WinBUGS\n");ESStable

rm(binomialMCMC)
rm(OrthogbinomialMCMC)
rm(BUGSmodel)


## 13: Modeling a cross-classified data structure ##############################

data("xc")

F7 <- attain ~ 1 + (1 | sid) + (1 | pid) + (1 | pupil)
(XCModel <- runMLwiN(Formula = F7, data = xc, estoptions = list(xc = TRUE, EstM = 1)))

PercentExplainedBy <- cbind(XCModel["RP"][["RP3_var_Intercept"]]/(XCModel["RP"][["RP1_var_Intercept"]] + 
  XCModel["RP"][["RP2_var_Intercept"]] + XCModel["RP"][["RP3_var_Intercept"]]) * 
  100, XCModel["RP"][["RP2_var_Intercept"]]/(XCModel["RP"][["RP1_var_Intercept"]] + 
  XCModel["RP"][["RP2_var_Intercept"]] + XCModel["RP"][["RP3_var_Intercept"]]) * 
  100)
colnames(PercentExplainedBy) <- c("sid", "pid")
PercentExplainedBy

rm(XCModel)


## 14: Modeling a multiple membership data structure ###########################

data("wage1")

F8 <- logearn ~ 1 + age_40 + numjobs + (1 | company) + (1 | id)
OurMultiMemb <- list(list(mmvar = list("company", "company2", "company3", "company4"), 
  weights = list("weight1", "weight2", "weight3", "weight4")), NA)
(MMembModel <- runMLwiN(Formula = F8, data = wage1, estoptions = list(EstM = 1, drop.data = FALSE, 
  mm = OurMultiMemb)))

MMembModel["RP"][["RP2_var_Intercept"]]/(MMembModel["RP"][["RP2_var_Intercept"]] + 
  MMembModel["RP"][["RP1_var_Intercept"]]) * 100

rm(MMembModel)


## 15: Multivariate response models ############################################

data("jspmix1")

F9 <- c(english, probit(behaviour)) ~ 1 + sex + ravens + fluent[1] + (1 | school) + 
  (1[1] | id)

(MixedRespMCMC <- runMLwiN(Formula = F9, D = c("Mixed", "Normal", "Binomial"), data = jspmix1, 
  estoptions = list(EstM = 1, mcmcMeth = list(fixM = 1, residM = 1, Lev1VarM = 2))))

CompareCorrelation <- cbind(MixedRespMCMC["RP"][["RP2_cov_Intercept_1_Intercept_2"]]/sqrt(MixedRespMCMC["RP"][["RP2_var_Intercept_1"]] * 
  MixedRespMCMC["RP"][["RP2_var_Intercept_2"]]), MixedRespMCMC["RP"][["RP1_cov_bcons_2_Intercept_1"]]/sqrt(MixedRespMCMC["RP"][["RP1_var_Intercept_1"]] * 
  MixedRespMCMC["RP"][["RP1_var_bcons_2"]]))
colnames(CompareCorrelation) <- c("school-level", "pupil-level")
CompareCorrelation

ExplainedBySchool <- cbind(MixedRespMCMC["RP"][["RP2_var_Intercept_1"]]/(MixedRespMCMC["RP"][["RP1_var_Intercept_1"]] + 
  MixedRespMCMC["RP"][["RP2_var_Intercept_1"]]) * 100, MixedRespMCMC["RP"][["RP2_var_Intercept_2"]]/(MixedRespMCMC["RP"][["RP1_var_bcons_2"]] + 
  MixedRespMCMC["RP"][["RP2_var_Intercept_2"]]) * 100)
colnames(ExplainedBySchool) <- c("english", "behaviour")
ExplainedBySchool

rm(MixedRespMCMC)


## 16: Running simulations / fitting multiple models ###########################

set.seed(1)
pupil <- 1:108
school <- rep(1:6, each = 18)
F10 <- resp ~ 1 + (1 | school) + (1 | pupil)

ns <- 100
IGLS_array <- MCMC_array <- array(, c(3, 2, ns))
MCMC_median <- data.frame(RP2_var_Intercept = rep(0, ns), RP1_var_Intercept = rep(0, 
  ns))
CounterMCMC <- rep(0, 3)
Actual <- c(30, 10, 40)
for (i in 1:ns) {
  u_short <- rnorm(6, 0, sqrt(Actual[2]))
  u <- rep(u_short, each = 18, len = 108)
  e <- rnorm(108, 0, sqrt(Actual[3]))
  resp <- Actual[1] * 1 + u + e
  simData <- data.frame(cbind(pupil, school, resp))
  simModelIGLS <- runMLwiN(F10, data = simData)
  IGLS_array[, , i] <- cbind(coef(simModelIGLS), diag(vcov(simModelIGLS)))
  simModelMCMC <- runMLwiN(F10, estoptions = list(EstM = 1), data = simData)
  MCMC_array[, , i] <- cbind(coef(simModelMCMC), diag(vcov(simModelMCMC)))
  MCMC_median[i, ] <- c(median(simModelMCMC["chains"][, "RP2_var_Intercept"]), 
    median(simModelMCMC["chains"][, "RP1_var_Intercept"]))
  if (Actual[1] > quantile(simModelMCMC["chains"][, "FP_Intercept"], 0.025) & Actual[1] < 
    quantile(simModelMCMC["chains"][, "FP_Intercept"], 0.975)) {
    CounterMCMC[1] <- CounterMCMC[1] + 1
  }
  if (Actual[2] > quantile(simModelMCMC["chains"][, "RP2_var_Intercept"], 0.025) & 
    Actual[2] < quantile(simModelMCMC["chains"][, "RP2_var_Intercept"], 0.975)) {
    CounterMCMC[2] <- CounterMCMC[2] + 1
  }
  if (Actual[3] > quantile(simModelMCMC["chains"][, "RP1_var_Intercept"], 0.025) & 
    Actual[3] < quantile(simModelMCMC["chains"][, "RP1_var_Intercept"], 0.975)) {
    CounterMCMC[3] <- CounterMCMC[3] + 1
  }
}

aa <- sapply(1:ns, function(x) na.omit(stack(as.data.frame(IGLS_array[, , x])))$values)
counterIGLS <- rep(0, 3)
for (i in 1:ns) {
  if (Actual[1] > aa[1, i] - 1.96 * sqrt(aa[4, i]) & Actual[1] < aa[1, i] + 1.96 * 
    sqrt(aa[4, i])) {
    counterIGLS[1] <- counterIGLS[1] + 1
  }
  if (Actual[2] > aa[2, i] - 1.96 * sqrt(aa[5, i]) & Actual[2] < aa[2, i] + 1.96 * 
    sqrt(aa[5, i])) {
    counterIGLS[2] <- counterIGLS[2] + 1
  }
  if (Actual[3] > aa[3, i] - 1.96 * sqrt(aa[6, i]) & Actual[3] < aa[3, i] + 1.96 * 
    sqrt(aa[6, i])) {
    counterIGLS[3] <- counterIGLS[3] + 1
  }
}
Percent_interval_coverage <- (counterIGLS/ns) * 100
Mean_across_simus <- round(c(mean(aa[1, ]), mean(aa[2, ]), mean(aa[3, ])), 2)
Percent_bias <- round(-100 * (1 - Mean_across_simus/Actual), 2)
IGLS_results <- cbind(Mean_across_simus, Actual, Percent_bias, Percent_interval_coverage)
rownames(IGLS_results) <- c("beta0", "sigma2_u", "sigma2_e")
Percent_interval_coverage <- (CounterMCMC/ns) * 100
bb <- sapply(1:ns, function(x) na.omit(stack(as.data.frame(MCMC_array[, , x])))$values)
Mean_across_simus <- round(c(mean(bb[1, ]), mean(bb[2, ]), mean(bb[3, ])), 2)
Percent_bias <- round(-100 * (1 - Mean_across_simus/Actual), 2)
MCMC_results <- cbind(Mean_across_simus, Actual, Percent_bias, Percent_interval_coverage)
rownames(MCMC_results) <- c("beta0", "sigma2_u", "sigma2_e")


cat("Simulation results using IGLS\n");IGLS_results
cat("Simulation results using MCMC\n");MCMC_results

Mean_across_simus <- round(c(mean(MCMC_median$RP2_var_Intercept), mean(MCMC_median$RP1_var_Intercept)), 
  2)
Actual <- tail(Actual, -1)
Percent_bias <- round(-100 * (1 - Mean_across_simus/Actual), 2)
Percent_interval_coverage <- tail(Percent_interval_coverage, -1)
MCMC_results2 <- cbind(Mean_across_simus, Actual, Percent_bias, Percent_interval_coverage)
rownames(MCMC_results2) <- c("sigma2_u", "sigma2_e")
cat("Simulation results based on median MCMC estimates\n");MCMC_results2