Attachment 'session7.R'
Download 1 ## Session 5: Multilevel logit mixed models
2 library(languageR)
3 library(Design)
4 data(dative)
5
6 ## Initial exploration
7 ?dative
8 str(dative)
9
10 ## give ourselves an indicator variable for the dependent variable
11 dative$is.PP <- ifelse(dative$RealizationOfRecipient == "PP", 1, 0)
12
13 ## calculate the proportion of PP realizations for every combination
14 ## of categorical predictors. also, take the mean of each continuous
15 ## predictor for each combination of categorical predictors
16 ## NB: also check out smeans.cl.norm and smeans.cl.boot
17 d.prop <- with(dative,
18 summarize(Cbind(is.PP, LengthOfRecipient, LengthOfTheme),
19 by = llist(AccessOfTheme,
20 AccessOfRec,
21 AnimacyOfRec,
22 AnimacyOfTheme,
23 PronomOfTheme,
24 DefinOfTheme,
25 SemanticClass,
26 Modality,
27 Verb),
28 FUN= function (x) {
29 x <- as.data.frame(x)
30 c(mean(x$is.PP), count = sum(x$is.PP), total = length(x$is.PP),
31 meanThemeLength = mean(x$LengthOfTheme),
32 meanRecLength = mean(x$LengthOfRecipient))
33 }, stat.name="prop"))
34 dd.prop <- datadist(d.prop)
35 options(datadist="dd.prop")
36
37 ###### linear model over proportions
38 d.ols <- ols(prop ~ AccessOfTheme +
39 AccessOfRec +
40 meanRecLength +
41 AnimacyOfRec +
42 AnimacyOfTheme +
43 PronomOfTheme +
44 DefinOfTheme +
45 meanThemeLength +
46 SemanticClass +
47 Modality, d.prop,
48 weights = 1/total,
49 x=TRUE, y=TRUE)
50
51 ## Bootstrapping with random cluster replacement
52 d.ols.boot <- bootcov(d.ols,
53 cluster=d.prop$Verb,
54 coef.reps=TRUE) # Try bootstrapping without
55 # the cluster argument. Will
56 # the variance on parameter
57 # estimates be larger or
58 # smaller?
59
60 ## Test assumptions
61 dev.new()
62 plot(calibrate(d.ols)) # Try increasing the B argument.
63 par(mfrow = c(4,3)) # Try turning on recording in
64 # your graphing window, and
65 # then setting mfrow back to
66 # 1, 1 before you run this
67 # plotting command.
68 bootplot(d.ols.boot, which = 1:10, what="qq")
69 par(mfrow = c(4,3))
70 bootplot(d.ols.boot, which = 1:10, what="density")
71 ## Is there anything else you can think of to test?
72
73 ## Test predictors
74 validate(d.ols, bw=TRUE) # Try increasing the B argument
75 anova(d.ols)
76 anova(d.ols.boot)
77 fastbw(d.ols)
78
79 dev.new()
80 plot(anova(d.ols))
81 plot(summary(d.ols)) # Which of these has less
82 # variance in the parameter
83 # estimates? Why?
84 plot(summary(d.ols.boot))
85 ## Do you want to test for any interactions?
86
87 ## Visualize results
88 dev.new()
89 par(mfrow = c(4,3)) # Turn on recording in your
90 # graphing window, and try
91 # these plotting commands with
92 # mfrow set to 1, 1
93 plot(d.ols)
94 par(mfrow = c(4,3))
95 plot(d.ols.boot)
96 ## Are any of the effects in a surprising direction? Does the numeric
97 ## range of the model's predictions make sense?
98
99 ## (can also generate arbitrary confidence intervarls)
100 ## d.ols.pred <- predict(d.ols, d.prop, type="x")
101 ## confplot(d.ols.boot, X=d.ols.pred)
102
103
104
105 ###### Logistic regression model
106 dd.dative <- datadist(dative)
107 options(datadist = 'dd.dative')
108
109 d.lrm <- lrm(is.PP ~
110 AccessOfTheme +
111 AccessOfRec +
112 LengthOfRecipient +
113 AnimacyOfRec +
114 AnimacyOfTheme +
115 PronomOfTheme +
116 DefinOfTheme +
117 LengthOfTheme +
118 SemanticClass +
119 Modality,
120 data = dative,
121 x=TRUE, y=TRUE)
122
123 ## Test assumptions
124 dev.new()
125 plot(calibrate(d.lrm))
126 par(mfrow = c(4,3))
127 bootplot(d.lrm.boot, which = 1:10, what="qq")
128 par(mfrow = c(4,3))
129 bootplot(d.lrm.boot, which = 1:10, what="density")
130 ## other tests?
131
132 ## Test predictors
133 validate(d.lrm, bw=TRUE)
134 anova(d.lrm)
135 anova(d.lrm.boot)
136 fastbw(d.lrm)
137
138 dev.new()
139 plot(anova(d.lrm))
140 plot(summary(d.lrm))
141 plot(summary(d.lrm.boot))
142 ## any factors you want to remove from the model? interactions you
143 ## want to add? discretization? transformations? what units are the
144 ## coefficients in? can we put the coefficients back on the original
145 ## scale? How would we interpret the coefficient on a log-transformed
146 ## independent variable in a logistic regression?
147
148 ## Visualize results
149 dev.new()
150 par(mfrow = c(4,3))
151 plot(d.lrm)
152 par(mfrow = c(4,3))
153 plot(d.lrm, fun=plogis)
154 par(mfrow = c(4,3))
155 plot(d.lrm.boot)
156 par(mfrow = c(4,3))
157 plot(d.lrm.boot, fun=plogis)
158 ## How do we interpret trends on the logit transformed data? On
159 ## probabilities? Should confidence intervals be symmetric in log
160 ## odds space? in proportion space? Why do we see a value for all n
161 ## levels of a categorical factor, when the model only reports
162 ## parameter estimates for n-1 levels?
163
164 ###### Mixed logit model
165 ###### Full model with random effect of verb (item effects)
166 ## Should we edit the model so that it only includes the effects we
167 ## expect to be significant based on previous analyses?
168 d.glmer1 <- lmer(is.PP ~
169 AccessOfTheme +
170 AccessOfRec +
171 LengthOfRecipient +
172 AnimacyOfRec +
173 AnimacyOfTheme +
174 PronomOfTheme +
175 DefinOfTheme +
176 LengthOfTheme +
177 SemanticClass +
178 Modality +
179 (1 | Verb),
180 data = dative, family = "binomial")
181
182 ## Use MCMC sampling to generate high posterior density intervals
183 glmm1.mcmc <- pvals.fnc(d.glmer1, withMCMC=TRUE)
184 dev.new()
185 plot(glmm1.mcmc$mcmc) # What do we learn here?
186 ## Are there any parameters whose estimates are very unstable? Is
187 ## this consistent with previous analyses?
188
189 ## Plot parameter estimates with HPD95 intervals
190 library(ggplot2)
191 fixed <- data.frame(names = names(fixef(d.glmer1)),
192 b = as.numeric(fixef(d.glmer1)),
193 l = as.numeric(glmm1.mcmc$fixed$HPD95lower),
194 u = as.numeric(glmm1.mcmc$fixed$HPD95upper))
195
196 ## Coefficient plot with HPD intervals
197 dev.new()
198 g <- ggplot(fixed, aes(x=names, y=b, colour=names))
199 g +
200 geom_hline(intercept=0, colour="black") +
201 geom_errorbar(width=.2, aes(x=names, y=b, min=l, max=u)) +
202 geom_point() + # Try to change this into a
203 # bar plot instead of a dot
204 # plot
205 scale_x_discrete(name=" ", labels=" ") +
206 scale_y_continuous(name="Change in log odds")
207 ## Are HPD intervals symmetric? Should we use the values from the
208 ## regression or from the MCMC sampling for our parameter estimates?
209 ## Could we use 1.96 * S.E. for each parameter as a confidence
210 ## interval? Do we expect the variance on parameter estimates to be
211 ## lower or higher than straight logistic regression?
212
213 ## Plot predicted effects of each input
214 dev.new()
215 par(mfrow=c(4,3))
216 plotLMER.fnc(d.glmer1, mcmcMat = glmm1.mcmc$mcmc)
217 par(mfrow=c(4,3))
218 plotLMER.fnc(d.glmer1, mcmcMat = glmm1.mcmc$mcmc, fun=plogis)
219 par(mfrow=c(1,1))
220 ## How do these compare to the bootstrapped model with cluster
221 ## replacement? Should we include any slope terms for particular
222 ## verbs?
223
224 ## Dealing with underdispersion
225
226 ## Q: The scale factor in poisson and binomial GLMMs is
227 ## variance/mean. Variance of a binomial distribution is np(1-p),
228 ## which should always be less than the mean, np. This would
229 ## guarantee underdispersion, but I know overdispersion can happen.
230 ## Does anyone know how the scale parameter is derived for a binomial
231 ## distribution?
232 d.glmer1.q <- lmer(is.PP ~
233 AccessOfTheme +
234 AccessOfRec +
235 LengthOfRecipient +
236 AnimacyOfRec +
237 AnimacyOfTheme +
238 PronomOfTheme +
239 DefinOfTheme +
240 LengthOfTheme +
241 SemanticClass +
242 Modality +
243 (1 | Verb),
244 data = dative, family = "quasibinomial")
245
246
247 ###### Mixed logit model with crossed random effects
248 ## Full model with random effects for verbs (item effects) and random
249 ## effects for subjects
250 d.glmer2 <- lmer(is.PP ~
251 AccessOfTheme +
252 AccessOfRec +
253 LengthOfRecipient +
254 AnimacyOfRec +
255 AnimacyOfTheme +
256 PronomOfTheme +
257 DefinOfTheme +
258 LengthOfTheme +
259 SemanticClass +
260 (1 | Verb) +
261 (1 | Speaker),
262 data = dative[!is.na(dative$Speaker),],
263 family = "binomial")
264
265 ## Use MCMC sampling to generate high posterior density intervals
266 glmm2.mcmc <- pvals.fnc(d.glmer2, withMCMC=TRUE)
267 dev.new()
268 plot(glmm2.mcmc$mcmc)
269
270 ## Plot parameter estimates with HPD95 intervals
271 fixed <- data.frame(names = names(fixef(d.glmer2)),
272 b = as.numeric(fixef(d.glmer2)),
273 l = as.numeric(glmm2.mcmc$fixed$HPD95lower),
274 u = as.numeric(glmm2.mcmc$fixed$HPD95upper))
275
276 ## Coefficient plot with HPD intervals
277 g <- ggplot(fixed, aes(x=names, y=b, colour=names))
278 g +
279 geom_hline(intercept=0, colour="black") +
280 geom_errorbar(width=.2, aes(x=names, y=b, min=l, max=u)) +
281 geom_point() +
282 scale_x_discrete(name=" ", labels=" ") +
283 scale_y_continuous(name="Change in log odds")
284
285 ## Plot predicted effects of each input
286 par(mfrow=c(4,3))
287 plotLMER.fnc(d.glmer2, mcmcMat = glmm2.mcmc$mcmc)
288 par(mfrow=c(4,3))
289 plotLMER.fnc(d.glmer2, mcmcMat = glmm2.mcmc$mcmc, fun=plogis)
290 par(mfrow=c(1,1))
291
292 ## Dealing with underdispersion
293 d.glmer2.q <- lmer(is.PP ~
294 AccessOfTheme +
295 AccessOfRec +
296 LengthOfRecipient +
297 AnimacyOfRec +
298 AnimacyOfTheme +
299 PronomOfTheme +
300 DefinOfTheme +
301 LengthOfTheme +
302 SemanticClass +
303 (1 | Verb) +
304 (1 | Speaker),
305 data = dative[!is.na(dative$Speaker),],
306 family = "quasibinomial")
307
308
309
310 ###### Work in progress...
311 ## xyplot(is.PP ~ LengthOfTheme,
312 ## dative,
313 ## type = "p",
314 ## col = alpha("blue", .5),
315 ## ylim = c(0,1),
316 ## panel = function (x, y, ...) {
317 ## xs = drop(d.glmer1@X)
318 ## fixed = as.matrix(fixef(d.glmer1))
319 ## preds = unlist(dimnames(xs)[2])
320 ## medians = matrix(rep(apply(xs, 2, median), length(x)), ncol=npred, byrow=TRUE)
321 ## npred = length(fixed)
322 ## pred.ind = which(preds == "LengthOfTheme")
323 ## if (pred.ind == npred) {
324 ## panel.curve(plogis(cbind(medians[,1:(pred.ind-1)], x) %*% fixed))
325 ## }
326 ## else {
327 ## print(length(x))
328 ## print(dim(cbind(medians[,1:(pred.ind-1)], x, medians[,(pred.ind+1):npred])))
329 ## print(dim(fixed))
330 ## print(dim(cbind(medians[,1:(pred.ind-1)], x, medians[,(pred.ind+1):npred]) %*% fixed))
331 ## panel.curve(plogis(cbind(medians[,1:(pred.ind-1)], x, medians[,(pred.ind+1):npred]) %*% fixed))
332 ## }
333 ## panel.stripplot(x, y, jitter=TRUE, ...)
334 ## })
335
336 ###### Simulate the model
337 ## copied from slot(getMethods("simulate"), "methods")$mer
338 ## my.simulate <- function (object, nsim = 1, seed = NULL, ...)
339 ## {
340 ## if (!exists(".Random.seed", envir = .GlobalEnv))
341 ## runif(1)
342 ## if (is.null(seed))
343 ## RNGstate <- .Random.seed
344 ## else {
345 ## R.seed <- .Random.seed
346 ## set.seed(seed)
347 ## RNGstate <- structure(seed, kind = as.list(RNGkind()))
348 ## on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv))
349 ## }
350 ## stopifnot((nsim <- as.integer(nsim[1])) > 0, is(object, "lmer"))
351 ## lpred <- .Call(lme4:::mer_simulate, object, nsim)
352 ## sc <- abs(object@devComp[8])
353 ## lpred <- lpred + drop(object@X %*% fixef(object))
354 ## n <- prod(dim(lpred))
355 ## fam <- attr(object, "family")
356 ## if (fam$family == "binomial") {
357 ## summary(lpred)
358 ## response <- as.data.frame(matrix(byrow = FALSE, ncol = nsim,
359 ## rbinom(n, 1, fam$linkinv(lpred))))
360 ## attr(response, "seed") <- RNGstate
361 ## return(response)
362 ## }
363 ## }
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.