**************************************************************************** * RUNMLWIN - A PROGRAM TO RUN THE MLWIN MULTILEVEL MODELLING SOFTWARE FROM * WITHIN STATA * * George Leckie and Chris Charlton * Centre for Multilevel Modelling * University of Bristol * * http://www.bristol.ac.uk/cmm/software/runmlwin/ * **************************************************************************** **************************************************************************** * 1. INTRODUCTION **************************************************************************** **************************************************************************** * 1.3 The runmlwin command **************************************************************************** * Install the runmlwin command from the Statistical Software Components * (SSC) archive ssc install runmlwin * Specify the file address for mlwin.exe on your computer as the global * macro MLwiN_path global MLwiN_path "C:\Program Files\MLwiN v2.26\i386\mlwin.exe" **************************************************************************** * 2. CONTINUOUS RESPONSE MODELS **************************************************************************** * Load the tutorial data into memory use "http://www.bristol.ac.uk/cmm/media/runmlwin/tutorial.dta", clear **************************************************************************** * 2.1 Variance components model **************************************************************************** * Fit a two-level students-within-schools variance components model to * students' age 16 scores runmlwin normexam cons, level2(school: cons) level1(student: cons) * Display the school-level VPC or ICC statistic display [RP2]var(cons)/([RP2]var(cons) + [RP1]var(cons)) * Re-fit the previous model , but do not pause in MLwiN runmlwin normexam cons, level2(school: cons) level1(student: cons) nopause **************************************************************************** * 2.2 Random intercept model with predictor variables **************************************************************************** * Generate a boys' school dummy variable generate boysch = (schgend==2) * Generate a girls' school dummy variable generate girlsch = (schgend==3) * Fit a two-level random intercept model for student age 16 scores with * predictor variables and retrieve the Empirical Bayes estimates of the * school random effects runmlwin normexam cons standlrt girl boysch girlsch, /// level2(school: cons, residuals(u)) /// level1(student: cons) nopause nogroup * Test whether the boys' school effect is significantly different from the * girls' school effect test boysch = girlsch * Tag one observation per school egen pickone_school = tag(school) * Plot a quantile-quantile plot of the school residuals qnorm u0 if pickone_school==1, aspectratio(1) * Rank the school residuals egen u0rank = rank(u0) if pickone_school==1 * Plot a caterpillar plot of the school residuals serrbar u0 u0se u0rank if pickone_school==1, scale(1.96) yline(0) * Count how many schools' residuals are significantly below zero count if pickone_school==1 & u0 + 1.96*u0se < 0 * Count how many schools' residuals are significantly above zero count if pickone_school==1 & u0 - 1.96*u0se > 0 * Drop the variables u0 and u0se drop u0 u0se * Save the estimation results under the name model2 estimates store model2 **************************************************************************** * 2.3 Random slope model **************************************************************************** * Fit a two-level random slope model for student age 16 scores and retrieve * the empirical Bayes estimates of the school random effects runmlwin normexam cons standlrt girl boysch girlsch, /// level2(school: cons standlrt, residuals(u)) /// level1(student: cons) nopause nogroup * Display the correlation between the random intercepts and the random * slopes display [RP2]cov(cons\standlrt)/sqrt([RP2]var(cons)*[RP2]var(standlrt)) * Save the estimation results under the name model3 estimates store model3 * Performs a likelihood-ratio test to compare this model to the previous * random intercept model lrtest model2 model3 * Display a table of model results for model2 and model3 estimates table model2 model3, /// stats(N deviance ll) b(%4.3f) stfmt(%4.0f) varwidth(18) * Generate predicted age 16 scores based only on age 11 scores and school * attended generate ypred = [FP1]cons + [FP1]standlrt*standlrt + u0 + u1*standlrt * Sort the data into ascending order based on school and then on standlrt * within each school sort school standlrt * Plot the predicted age 16 scores against age 11 scores line ypred standlrt, connect(a) * Generate the predicted between-school variance for each student generate lev2var = [RP2]var(cons) + 2*[RP2]cov(cons\standlrt)*standlrt /// + [RP2]var(standlrt)*standlrt^2 * Plot the predicted between-school variance against age 11 scores line lev2var standlrt, sort **************************************************************************** * 3. BINARY RESPONSE MODELS **************************************************************************** * Load the Bangladeshi contraceptive use data into memory use "http://www.bristol.ac.uk/cmm/media/runmlwin/bang.dta", clear **************************************************************************** * 3.1 Variance components model **************************************************************************** * Fit a two-level women-within-districts variance components logit model by * PQL2 for women's contraceptive use runmlwin use cons, /// level2(district: cons) /// level1(woman:) /// discrete(distribution(binomial) link(logit) denom(cons) pql2) /// nopause * Fit the previous model by MCMC using the PQL2 estimates from the previous * model as starting values for MCMC estimation. Model takes 12 seconds to * fit on our computer. runmlwin use cons, /// level2(district: cons) /// level1(woman:) /// discrete(distribution(binomial) link(logit) denom(cons)) /// mcmc(on) initsprevious nopause nogroup * Display the odds of using contraception in the average district display exp([FP1]cons) * Display the probability of using contraception in the average district display exp([FP1]cons)/(1 + exp([FP1]cons)) * Display the district-level VPC and ICC display [RP2]var(cons)/([RP2]var(cons) + _pi^2/3) * Plot a trajectories (trace) plot of the MCMC chains mcmcsum, trajectories * Plot a fiveway MCMC graphical diagnostic plot mcmcsum [RP2]var(cons), fiveway * Display detailed MCMC summary statistics for the between-district variance mcmcsum [RP2]var(cons) **************************************************************************** * 3.2 Random intercept model with predictor variables **************************************************************************** * Generate a dummy variable for having one living child generate lc1 = (lc==1) * Generate a dummy variable for having two living children generate lc2 = (lc==2) * Generate a dummy variable for having three or more living children generate lc3plus = (lc==3) * Generate a dummy variable for having received a lower primary education gen edlprim = (educ==2) * Generate a dummy variable for having received an upper primary education gen eduprim = (educ==3) * Generate a dummy variable for having received a secondary education gen edsecplus = (educ==4) * Fit a two-level random intercept model for women's contraceptive use * adjusting for predictor variables by PQL2 quietly runmlwin use cons age lc1 lc2 lc3plus urban /// edlprim eduprim edsecplus hindu, /// level2(district: cons) /// level1(woman:) /// discrete(distribution(binomial) link(logit) denom(cons) pql2) /// nopause * Fit the previous model by MCMC for a burnin of 1000 iterations followed * by a monitoring period of 10000 iterations. Model takes 72 seconds to fit * on our computer. runmlwin use cons age lc1 lc2 lc3plus urban /// edlprim eduprim edsecplus hindu, /// level2(district: cons, residuals(u)) /// level1(woman:) /// discrete(distribution(binomial) link(logit) denom(cons)) /// mcmc(burnin(1000) chain(10000)) initsprevious nopause nogroup * Drop the variables u0 and u0se drop u0 u0se * Re-display the model results as odds ratios runmlwin, noheader noretable or **************************************************************************** * 3.3 Random slope models **************************************************************************** * Quietly fit a two-level random slope model for contraceptive use by PQL2 quietly runmlwin use cons age lc1 lc2 lc3plus urban /// edlprim eduprim edsecplus hindu, /// level2(district: cons urban) /// level1(woman:) /// discrete(distribution(binomial) link(logit) denom(cons) pql2) /// nopause * Fit the previous model by MCMC using an orthogonal fixed effects * parameterisation to make the MCMC chains less autocorrelated. Model takes * 46 seconds to fit on our computer. runmlwin use cons age lc1 lc2 lc3plus urban /// edlprim eduprim edsecplus hindu, /// level2(district: cons urban, residuals(u)) /// level1(woman:) /// discrete(distribution(binomial) link(logit) denom(cons)) /// mcmc(orthogonal) initsprevious nopause nogroup * Display the between-district variance for women living in rural areas display [RP2]var(cons) * Display the between-district variance for women living in urban areas display [RP2]var(cons) + 2*[RP2]cov(cons\urban) + [RP2]var(urban) * Rename the variable d_lit to dlit rename d_lit dlit * Rename the variable d_pray to dpray rename d_pray dpray * Quietly fit the previous model by PQL2 where we add in the two additional * predictor variables quietly runmlwin use cons age lc1 lc2 lc3plus urban /// edlprim eduprim edsecplus hindu dlit dpray, /// level2(district: cons urban) /// level1(woman:) /// discrete(distribution(binomial) link(logit) denom(cons) pql2) /// nopause * Fit the previous model by MCMC. Model takes 52 seconds to fit on our * computer. runmlwin use cons age lc1 lc2 lc3plus urban /// edlprim eduprim edsecplus hindu dlit dpray, /// level2(district: cons urban) /// level1(woman:) /// discrete(distribution(binomial) link(logit) denom(cons)) /// mcmc(orthogonal) initsprevious nopause nogroup * Display the between-district variance for women living in rural areas display [RP2]var(cons) * Display the between-district variance for women living in urban areas display [RP2]var(cons) + 2*[RP2]cov(cons\urban) + [RP2]var(urban) **************************************************************************** * 4. MORE ADVANCED MODELS **************************************************************************** **************************************************************************** * 4.1 Multivariate response model **************************************************************************** * Load the tutorial data into memory use "http://www.bristol.ac.uk/cmm/media/runmlwin/tutorial.dta", clear * Generate a binary response variable equal to 1 if normexam is positive, * otherwise 0 gen pass = (normexam>0) * Quietly fit a two-level students-within-schools bivariate mixed response * type model to continuous age 11 scores and binary age 16 scores quietly runmlwin (standlrt cons girl, eq(1)) (pass cons girl, eq(2)), /// level2(school: (cons, eq(1)) (cons, eq(2))) /// level1(student: (cons, eq(1))) /// discrete(distribution(normal binomial) link(probit) denom(cons cons)) /// nopause * Fit the previous model by MCMC. Model takes 239 seconds to fit on our * computer. runmlwin (standlrt cons girl, eq(1)) (pass cons girl, eq(2)), /// level2(school: (cons, eq(1)) (cons, eq(2))) /// level1(student: (cons, eq(1))) /// discrete(distribution(normal binomial) link(probit) denom(cons cons)) /// mcmc(orthogonal) initsprevious /// nopause nogroup corr * Display the boy predicted probability of passing the age 16 exams display normal([FP2]cons_2) * Display the girl predicted probability of passing the age 16 exams display normal([FP2]cons_2 + [FP2]girl_2) **************************************************************************** * 4.2 Ordinal response model **************************************************************************** * Load the Bangladeshi contraceptive use data into memory use "http://www.bristol.ac.uk/cmm/media/runmlwin/bang.dta", clear * Define value labels for the variable use4 label define use4 1 "ster" 2 "mod" 3 "trad" 4 "none", modify * Assign these value labels to the variable use4 label values use4 use4 * Generate a dummy variable for having one living child generate lc1 = (lc==1) * Generate a dummy variable for having two living children generate lc2 = (lc==2) * Generate a dummy variable for having three or more living children generate lc3plus = (lc==3) * Fit a two-level women-within-districts ordinal logit model by MQL1 for * contraceptive use quietly runmlwin use4 cons (age lc1 lc2 lc3plus urban, contrast(1/3)), /// level2(district: (cons, contrast(1/3))) /// level1(woman:) /// discrete(dist(multinomial) link(ologit) denom(cons) basecategory(4)) /// nopause * Fit the previous model by MCMC. Model takes 62 seconds to fit on our * computer. runmlwin use4 cons (age lc1 lc2 lc3plus urban, contrast(1/3)), /// level2(district: (cons, contrast(1/3))) /// level1(woman:) /// discrete(dist(multinomial) link(ologit) denom(cons) basecategory(4)) /// mcmc(orthogonal) initsprevious /// nopause nogroup **************************************************************************** * 4.3 Nominal response model **************************************************************************** * Load the Bangladeshi contraceptive use data into memory use "http://www.bristol.ac.uk/cmm/media/runmlwin/bang.dta", clear * Define value labels for the variable use4 label define use4 1 "ster" 2 "mod" 3 "trad" 4 "none", modify * Assign these value labels to the variable use4 label values use4 use4 * Generate a dummy variable for having one living child generate lc1 = (lc==1) * Generate a dummy variable for having two living children generate lc2 = (lc==2) * Generate a dummy variable for having three or more living children generate lc3plus = (lc==3) * Fit a two-level women-within-districts nominal logit model by MQL1 for * contraceptive use quietly runmlwin use4 cons age lc1 lc2 lc3plus urban, /// level2(district: cons) /// level1(woman:) /// discrete(dist(multinomial) link(mlogit) denom(cons) basecategory(4)) /// nopause * Fit the previous model by MCMC. Model takes 171 seconds to fit on our * computer. runmlwin use4 cons age lc1 lc2 lc3plus urban, /// level2(district: cons, residuals(u)) /// level1(woman:) /// discrete(dist(multinomial) link(mlogit) denom(cons) basecategory(4)) /// mcmc(orthogonal) initsprevious /// nopause nogroup * Generate the numerator of the predicted probability expression for the * predicted probability that use4=1 as a function of age and urban, but * holding the number of living children constant at 2 and holding district * unobservables constant at zero generate n1 = /// exp([FP1]cons_1 + [FP1]age_1*age + [FP1]lc2_1 + [FP1]urban_1*urban) * Generate the numerator of the predicted probability expression for the * predicted probability that use4=2 as a function of age and urban, but * holding the number of living children constant at 2 and holding district * unobservables constant at zero generate n2 = /// exp([FP2]cons_2 + [FP2]age_2*age + [FP2]lc2_2 + [FP2]urban_2*urban) * Generate the numerator of the predicted probability expression for the * predicted probability that use4=3 as a function of age and urban, but * holding the number of living children constant at 2 and holding district * unobservables constant at zero generate n3 = /// exp([FP3]cons_3 + [FP3]age_3*age + [FP3]lc2_3 + [FP3]urban_3*urban) * Generate the denominator of the predicted probability expression generate d = (1 + n1 + n2 + n3) * Generate the predicted probability that use4=1 generate p1 = n1/d * Generate the predicted probability that use4=2 generate p2 = n3/d * Generate the predicted probability that use4=3 generate p3 = n2/d * Generate the predicted probability that use4=4 generate p4 = 1 - p1 - p2 - p3 * Generate a new variable actualage which stores the actual age of each * woman generate actualage = age + 30 * Define value labels for the variable urban label define urban 0 "Rural" 1 "Urban" * Assign these value labels to the variable urban label values urban urban * Plot the predicted probabilities against age twoway /// (line p4 actualage, sort) /// (line p3 actualage, sort lpattern(dash)) /// (line p2 actualage, sort lpattern(longdash)) /// (line p1 actualage, sort lpattern(vshortdash)), by(urban, note("")) /// ytitle("Probability") xtitle("Woman's age (years)") /// ylabel(0(0.25)1, format(%3.2f) angle(0)) xlabel(15(5)50) /// legend(order( /// 1 "No contraception" /// 2 "Traditional method" /// 3 "Modern method" /// 4 "Sterilisation" /// )) * Drop the variables n1, n2, n3, d, p1, p2, p3 and p4 drop n? d p? * Generate the numerator of the predicted probability expression for the * predicted probability that use4=1 as a function of number of living * children, urban and district, but holding age constant at 30 generate n1 = exp([FP1]cons_1 + [FP1]lc1_1*lc1 + [FP1]lc2_1*lc2 /// + [FP1]lc3plus_1*lc3plus + [FP1]urban_1*urban + u0) * Generate the numerator of the predicted probability expression for the * predicted probability that use4=2 as a function of number of living * children, urban and district, but holding age constant at 30 generate n2 = exp([FP2]cons_2 + [FP2]lc1_2*lc1 + [FP2]lc2_2*lc2 /// + [FP2]lc3plus_2*lc3plus + [FP2]urban_2*urban + u1) * Generate the numerator of the predicted probability expression for the * predicted probability that use4=3 as a function of number of living * children, urban and district, but holding age constant at 30 generate n3 = exp([FP3]cons_3 + [FP3]lc1_3*lc1 + [FP3]lc2_3*lc2 /// + [FP3]lc3plus_3*lc3plus + [FP3]urban_3*urban + u2) * Generate the denominator of the predicted probability expression generate d = (1 + n1 + n2 + n3) * Generate the predicted probability that use4=1 generate p1 = n1/d * Generate the predicted probability that use4=2 generate p2 = n3/d * Generate the predicted probability that use4=3 generate p3 = n2/d * Generate the predicted probability that use4=4 generate p4 = 1 - p1 - p2 - p3 * Generate a new variable lc_p4 equal to lc minus 0.15 generate lc_p4 = lc - 0.15 * Generate a new variable lc_p4 equal to lc minus 0.075 generate lc_p3 = lc - 0.075 * Generate a new variable lc_p4 equal to lc plus 0.075 generate lc_p2 = lc + 0.075 * Generate a new variable lc_p4 equal to lc plus 0.15 generate lc_p1 = lc + 0.15 * Plot the predicted probabilities against number of living children twoway /// (scatter p4 lc_p4, sort msymbol(Oh)) /// (scatter p3 lc_p3, sort msymbol(Dh)) /// (scatter p2 lc_p2, sort msymbol(Th)) /// (scatter p1 lc_p1, sort msymbol(Sh)), by(urban, note("")) /// ytitle("Probability") xtitle("Number of living children") /// xscale(range(-0.25 3.25)) /// ylabel(0(0.25)1, format(%3.2f) angle(0)) /// legend(order( /// 1 "No contraception" /// 2 "Traditional method" /// 3 "Modern method" /// 4 "Sterilisation" /// )) **************************************************************************** exit