attachment:session7.R of HLPMiniCourseSession5

Attachment 'session7.R'

   1 ## Session 5:  Multilevel logit mixed models
   2 library(languageR)
   3 library(Design)
   4 data(dative)
   5 
   6 ## Initial exploration
   7 ?dative
   8 str(dative)
   9 
  10 ## give ourselves an indicator variable for the dependent variable
  11 dative$is.PP <- ifelse(dative$RealizationOfRecipient == "PP", 1, 0)
  12 
  13 ## calculate the proportion of PP realizations for every combination
  14 ## of categorical predictors.  also, take the mean of each continuous
  15 ## predictor for each combination of categorical predictors
  16 ## NB:  also check out smeans.cl.norm and smeans.cl.boot
  17 d.prop <- with(dative,
  18                summarize(Cbind(is.PP, LengthOfRecipient, LengthOfTheme),
  19                          by = llist(AccessOfTheme, 
  20                            AccessOfRec,
  21                            AnimacyOfRec,
  22                            AnimacyOfTheme,
  23                            PronomOfTheme,
  24                            DefinOfTheme,
  25                            SemanticClass,
  26                            Modality,
  27                            Verb),
  28                          FUN= function (x) {
  29                            x <- as.data.frame(x)
  30                            c(mean(x$is.PP), count = sum(x$is.PP), total = length(x$is.PP),
  31                              meanThemeLength = mean(x$LengthOfTheme),
  32                              meanRecLength = mean(x$LengthOfRecipient))
  33                          }, stat.name="prop"))
  34 dd.prop <- datadist(d.prop)
  35 options(datadist="dd.prop")
  36 
  37 ###### linear model over proportions
  38 d.ols <- ols(prop ~ AccessOfTheme + 
  39              AccessOfRec +
  40              meanRecLength +
  41              AnimacyOfRec +
  42              AnimacyOfTheme +
  43              PronomOfTheme +
  44              DefinOfTheme +
  45              meanThemeLength +
  46              SemanticClass +
  47              Modality, d.prop,
  48              weights = 1/total,
  49              x=TRUE, y=TRUE)
  50 
  51 ## Bootstrapping with random cluster replacement
  52 d.ols.boot <- bootcov(d.ols,
  53                       cluster=d.prop$Verb, 
  54                       coef.reps=TRUE)   # Try bootstrapping without
  55                                         # the cluster argument.  Will
  56                                         # the variance on parameter
  57                                         # estimates be larger or
  58                                         # smaller?
  59 
  60 ## Test assumptions
  61 dev.new()
  62 plot(calibrate(d.ols))                  # Try increasing the B argument.
  63 par(mfrow = c(4,3))                     # Try turning on recording in
  64                                         # your graphing window, and
  65                                         # then setting mfrow back to
  66                                         # 1, 1 before you run this
  67                                         # plotting command.
  68 bootplot(d.ols.boot, which = 1:10, what="qq")
  69 par(mfrow = c(4,3))
  70 bootplot(d.ols.boot, which = 1:10, what="density")
  71 ## Is there anything else you can think of to test?
  72 
  73 ## Test predictors
  74 validate(d.ols, bw=TRUE)                # Try increasing the B argument
  75 anova(d.ols)
  76 anova(d.ols.boot)
  77 fastbw(d.ols)
  78 
  79 dev.new()
  80 plot(anova(d.ols))
  81 plot(summary(d.ols))                    # Which of these has less
  82                                         # variance in the parameter
  83                                         # estimates?  Why?
  84 plot(summary(d.ols.boot))
  85 ## Do you want to test for any interactions?
  86 
  87 ## Visualize results
  88 dev.new()
  89 par(mfrow = c(4,3))                     # Turn on recording in your
  90                                         # graphing window, and try
  91                                         # these plotting commands with
  92                                         # mfrow set to 1, 1
  93 plot(d.ols)
  94 par(mfrow = c(4,3))
  95 plot(d.ols.boot)
  96 ## Are any of the effects in a surprising direction?  Does the numeric
  97 ## range of the model's predictions make sense?
  98 
  99 ## (can also generate arbitrary confidence intervarls)
 100 ## d.ols.pred <- predict(d.ols, d.prop, type="x")
 101 ## confplot(d.ols.boot, X=d.ols.pred)
 102 
 103 
 104 
 105 ###### Logistic regression model
 106 dd.dative <- datadist(dative)
 107 options(datadist = 'dd.dative')
 108 
 109 d.lrm <- lrm(is.PP ~ 
 110              AccessOfTheme +
 111              AccessOfRec +
 112              LengthOfRecipient +
 113              AnimacyOfRec +
 114              AnimacyOfTheme +
 115              PronomOfTheme +
 116              DefinOfTheme +
 117              LengthOfTheme +
 118              SemanticClass +
 119              Modality,
 120              data = dative,
 121              x=TRUE, y=TRUE)
 122 
 123 ## Test assumptions
 124 dev.new()
 125 plot(calibrate(d.lrm))
 126 par(mfrow = c(4,3))
 127 bootplot(d.lrm.boot, which = 1:10, what="qq")
 128 par(mfrow = c(4,3))
 129 bootplot(d.lrm.boot, which = 1:10, what="density")
 130 ## other tests?
 131 
 132 ## Test predictors
 133 validate(d.lrm, bw=TRUE)
 134 anova(d.lrm)
 135 anova(d.lrm.boot)
 136 fastbw(d.lrm)
 137 
 138 dev.new()
 139 plot(anova(d.lrm))
 140 plot(summary(d.lrm))
 141 plot(summary(d.lrm.boot))
 142 ## any factors you want to remove from the model?  interactions you
 143 ## want to add?  discretization?  transformations?  what units are the
 144 ## coefficients in?  can we put the coefficients back on the original
 145 ## scale?  How would we interpret the coefficient on a log-transformed
 146 ## independent variable in a logistic regression?
 147 
 148 ## Visualize results
 149 dev.new()
 150 par(mfrow = c(4,3))
 151 plot(d.lrm)
 152 par(mfrow = c(4,3))
 153 plot(d.lrm, fun=plogis)
 154 par(mfrow = c(4,3))
 155 plot(d.lrm.boot)
 156 par(mfrow = c(4,3))
 157 plot(d.lrm.boot, fun=plogis)
 158 ## How do we interpret trends on the logit transformed data?  On
 159 ## probabilities?  Should confidence intervals be symmetric in log
 160 ## odds space?  in proportion space?  Why do we see a value for all n
 161 ## levels of a categorical factor, when the model only reports
 162 ## parameter estimates for n-1 levels?
 163 
 164 ###### Mixed logit model
 165 ###### Full model with random effect of verb (item effects)
 166 ## Should we edit the model so that it only includes the effects we
 167 ## expect to be significant based on previous analyses?
 168 d.glmer1 <- lmer(is.PP ~
 169                  AccessOfTheme + 
 170                  AccessOfRec +
 171                  LengthOfRecipient +
 172                  AnimacyOfRec +
 173                  AnimacyOfTheme +
 174                  PronomOfTheme +
 175                  DefinOfTheme +
 176                  LengthOfTheme +
 177                  SemanticClass +
 178                  Modality +
 179                  (1 | Verb), 
 180                  data = dative, family = "binomial")
 181 
 182 ## Use MCMC sampling to generate high posterior density intervals
 183 glmm1.mcmc <- pvals.fnc(d.glmer1, withMCMC=TRUE)
 184 dev.new()
 185 plot(glmm1.mcmc$mcmc)                   # What do we learn here?
 186 ## Are there any parameters whose estimates are very unstable?  Is
 187 ## this consistent with previous analyses?
 188 
 189 ## Plot parameter estimates with HPD95 intervals
 190 library(ggplot2)
 191 fixed <- data.frame(names = names(fixef(d.glmer1)),
 192                     b = as.numeric(fixef(d.glmer1)),
 193                     l = as.numeric(glmm1.mcmc$fixed$HPD95lower),
 194                     u = as.numeric(glmm1.mcmc$fixed$HPD95upper))
 195 
 196 ## Coefficient plot with HPD intervals
 197 dev.new()
 198 g <- ggplot(fixed, aes(x=names, y=b, colour=names))
 199 g +
 200   geom_hline(intercept=0, colour="black") +
 201   geom_errorbar(width=.2, aes(x=names, y=b, min=l, max=u)) +
 202   geom_point() +                        # Try to change this into a
 203                                         # bar plot instead of a dot
 204                                         # plot
 205   scale_x_discrete(name=" ", labels=" ") +
 206   scale_y_continuous(name="Change in log odds")
 207 ## Are HPD intervals symmetric?  Should we use the values from the
 208 ## regression or from the MCMC sampling for our parameter estimates?
 209 ## Could we use 1.96 * S.E. for each parameter as a confidence
 210 ## interval?  Do we expect the variance on parameter estimates to be
 211 ## lower or higher than straight logistic regression?  
 212 
 213 ## Plot predicted effects of each input
 214 dev.new()
 215 par(mfrow=c(4,3))
 216 plotLMER.fnc(d.glmer1, mcmcMat = glmm1.mcmc$mcmc)
 217 par(mfrow=c(4,3))
 218 plotLMER.fnc(d.glmer1, mcmcMat = glmm1.mcmc$mcmc, fun=plogis)
 219 par(mfrow=c(1,1))
 220 ## How do these compare to the bootstrapped model with cluster
 221 ## replacement?  Should we include any slope terms for particular
 222 ## verbs?
 223 
 224 ## Dealing with underdispersion
 225 
 226 ## Q:  The scale factor in poisson and binomial GLMMs is
 227 ## variance/mean.  Variance of a binomial distribution is np(1-p),
 228 ## which should always be less than the mean, np.  This would
 229 ## guarantee underdispersion, but I know overdispersion can happen.
 230 ## Does anyone know how the scale parameter is derived for a binomial
 231 ## distribution?
 232 d.glmer1.q <- lmer(is.PP ~
 233                    AccessOfTheme + 
 234                    AccessOfRec +
 235                    LengthOfRecipient +
 236                    AnimacyOfRec +
 237                    AnimacyOfTheme +
 238                    PronomOfTheme +
 239                    DefinOfTheme +
 240                    LengthOfTheme +
 241                    SemanticClass +
 242                    Modality +
 243                    (1 | Verb), 
 244                    data = dative, family = "quasibinomial")
 245 
 246 
 247 ###### Mixed logit model with crossed random effects
 248 ## Full model with random effects for verbs (item effects) and random
 249 ## effects for subjects
 250 d.glmer2 <- lmer(is.PP ~
 251                  AccessOfTheme + 
 252                  AccessOfRec +
 253                  LengthOfRecipient +
 254                  AnimacyOfRec +
 255                  AnimacyOfTheme +
 256                  PronomOfTheme +
 257                  DefinOfTheme +
 258                  LengthOfTheme +
 259                  SemanticClass +
 260                  (1 | Verb) +
 261                  (1 | Speaker), 
 262                  data = dative[!is.na(dative$Speaker),],
 263                  family = "binomial")
 264 
 265 ## Use MCMC sampling to generate high posterior density intervals
 266 glmm2.mcmc <- pvals.fnc(d.glmer2, withMCMC=TRUE)
 267 dev.new()
 268 plot(glmm2.mcmc$mcmc)
 269 
 270 ## Plot parameter estimates with HPD95 intervals
 271 fixed <- data.frame(names = names(fixef(d.glmer2)),
 272                     b = as.numeric(fixef(d.glmer2)),
 273                     l = as.numeric(glmm2.mcmc$fixed$HPD95lower),
 274                     u = as.numeric(glmm2.mcmc$fixed$HPD95upper))
 275 
 276 ## Coefficient plot with HPD intervals
 277 g <- ggplot(fixed, aes(x=names, y=b, colour=names))
 278 g +
 279   geom_hline(intercept=0, colour="black") +
 280   geom_errorbar(width=.2, aes(x=names, y=b, min=l, max=u)) +
 281   geom_point() +
 282   scale_x_discrete(name=" ", labels=" ") +
 283   scale_y_continuous(name="Change in log odds")
 284 
 285 ## Plot predicted effects of each input
 286 par(mfrow=c(4,3))
 287 plotLMER.fnc(d.glmer2, mcmcMat = glmm2.mcmc$mcmc)
 288 par(mfrow=c(4,3))
 289 plotLMER.fnc(d.glmer2, mcmcMat = glmm2.mcmc$mcmc, fun=plogis)
 290 par(mfrow=c(1,1))
 291 
 292 ## Dealing with underdispersion
 293 d.glmer2.q <- lmer(is.PP ~
 294                    AccessOfTheme + 
 295                    AccessOfRec +
 296                    LengthOfRecipient +
 297                    AnimacyOfRec +
 298                    AnimacyOfTheme +
 299                    PronomOfTheme +
 300                    DefinOfTheme +
 301                    LengthOfTheme +
 302                    SemanticClass +
 303                    (1 | Verb) +
 304                    (1 | Speaker), 
 305                    data = dative[!is.na(dative$Speaker),],
 306                    family = "quasibinomial")
 307 
 308 
 309 
 310 ###### Work in progress...
 311 ## xyplot(is.PP ~ LengthOfTheme,
 312 ##        dative, 
 313 ##        type = "p",
 314 ##        col = alpha("blue", .5),
 315 ##        ylim = c(0,1),
 316 ##        panel = function (x, y, ...) {
 317 ##          xs = drop(d.glmer1@X)
 318 ##          fixed = as.matrix(fixef(d.glmer1))
 319 ##          preds = unlist(dimnames(xs)[2])
 320 ##          medians = matrix(rep(apply(xs, 2, median), length(x)), ncol=npred, byrow=TRUE)
 321 ##          npred = length(fixed)
 322 ##          pred.ind = which(preds == "LengthOfTheme")
 323 ##          if (pred.ind == npred) {
 324 ##            panel.curve(plogis(cbind(medians[,1:(pred.ind-1)], x) %*% fixed))
 325 ##          }
 326 ##          else {
 327 ##            print(length(x))
 328 ##            print(dim(cbind(medians[,1:(pred.ind-1)], x, medians[,(pred.ind+1):npred])))
 329 ##            print(dim(fixed))
 330 ##            print(dim(cbind(medians[,1:(pred.ind-1)], x, medians[,(pred.ind+1):npred]) %*% fixed))
 331 ##            panel.curve(plogis(cbind(medians[,1:(pred.ind-1)], x, medians[,(pred.ind+1):npred]) %*% fixed))
 332 ##          }
 333 ##          panel.stripplot(x, y, jitter=TRUE, ...)
 334 ##        })
 335 
 336 ###### Simulate the model
 337 ## copied from slot(getMethods("simulate"), "methods")$mer
 338 ## my.simulate <- function (object, nsim = 1, seed = NULL, ...) 
 339 ## {
 340 ##   if (!exists(".Random.seed", envir = .GlobalEnv)) 
 341 ##     runif(1)
 342 ##   if (is.null(seed)) 
 343 ##     RNGstate <- .Random.seed
 344 ##   else {
 345 ##     R.seed <- .Random.seed
 346 ##     set.seed(seed)
 347 ##     RNGstate <- structure(seed, kind = as.list(RNGkind()))
 348 ##     on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv))
 349 ##   }
 350 ##   stopifnot((nsim <- as.integer(nsim[1])) > 0, is(object, "lmer"))
 351 ##   lpred <- .Call(lme4:::mer_simulate, object, nsim)
 352 ##   sc <- abs(object@devComp[8])
 353 ##   lpred <- lpred + drop(object@X %*% fixef(object))
 354 ##   n <- prod(dim(lpred))
 355 ##   fam <- attr(object, "family")
 356 ##   if (fam$family == "binomial") {
 357 ##     summary(lpred)
 358 ##     response <- as.data.frame(matrix(byrow = FALSE, ncol = nsim, 
 359 ##                                      rbinom(n, 1, fam$linkinv(lpred))))
 360 ##     attr(response, "seed") <- RNGstate
 361 ##     return(response)
 362 ##   }
 363 ## }
Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
You are not allowed to attach a file to this page.