attachment:LSA13-Lecture4-plyr-reshape.R of LSA2013Regression

Attachment 'LSA13-Lecture4-plyr-reshape.R'

   1 ## @knitr preamble, echo=FALSE, results='hide', cache=FALSE, message=F
   2 library(plyr)
   3 library(reshape2)
   4 
   5 library(ggplot2)
   6 library(stringr)
   7 
   8 # set knitr chunk option defaults for this document
   9 opts_chunk$set(results='markup', echo=TRUE, cache=T, autodep=T, warning=F, message=F, prompt=F,
  10                fig.width=4, fig.height=4, out.width='0.5\\textwidth', fig.pos='!h', fig.lp='fig:',
  11                tidy=F, size='scriptsize')
  12 
  13 data(lexdec, package='languageR')
  14 
  15 # set ggplot theme globally
  16 theme_set(theme_bw())
  17 
  18 
  19 
  20 ## @knitr 
  21 freqEffects <- ddply(lexdec, .(Subject), function(df) {coef(lm(RT~Frequency, data=df))})
  22 
  23 
  24 ## @knitr echo=F
  25 head(freqEffects)
  26 
  27 
  28 ## @knitr lexdec-freq-effect-bysub, echo=F, out.width='0.9\\textwidth'
  29 ggplot(lexdec, aes(x=Frequency, y=RT)) +
  30   geom_point() +
  31   facet_wrap(~Subject) + 
  32   geom_abline(data=freqEffects, aes(slope=Frequency, intercept=`(Intercept)`), color='red')
  33 
  34 
  35 ## @knitr 
  36 mean(runif(100))
  37 
  38 
  39 ## @knitr 
  40 f <- mean
  41 f(runif(100))
  42 
  43 
  44 ## @knitr 
  45 function(x) sum(x)/length(x)
  46 
  47 
  48 ## @knitr 
  49 mymean <- function(x) sum(x)/length(x)
  50 mymean(runif(100))
  51 mymean(runif(100, 1, 2))
  52 
  53 
  54 ## @knitr 
  55 (function(x) sum(x)/length(x)) (runif(100))
  56 
  57 
  58 ## @knitr 
  59 make.power <- function(n) {
  60   return(function(x) x^n)
  61 }
  62 my.square <- make.power(2)
  63 my.square(3)
  64 (make.power(4)) (2)
  65 
  66 
  67 ## @knitr 
  68 sd(rnorm(sd=5, mean=1, 100))
  69 
  70 
  71 ## @knitr 
  72 rnorm
  73 
  74 
  75 ## @knitr 
  76 list.o.nums <- list(runif(100), rnorm(100), rpois(100, lambda=1))
  77 lapply(list.o.nums, mean)
  78 
  79 
  80 ## @knitr 
  81 sapply(1:10, function(n) rpois(5, lambda=n))
  82 
  83 
  84 ## @knitr 
  85 sapply(1:10, function(n) mean(rnorm(n=5, mean=0, sd=1)))
  86 
  87 
  88 ## @knitr 
  89 data(lexdec, package='languageR')
  90 RT.bysub <- with(lexdec, split(RT, Subject))
  91 RT.means.bysub <- sapply(RT.bysub, mean)
  92 head(data.frame(RT.mean=RT.means.bysub))
  93 
  94 
  95 ## @knitr 
  96 library(plyr) 
  97 head(ddply(lexdec, .(Subject), function(df) data.frame(RT.mean=mean(df$RT)))) 
  98 
  99 
 100 ## @knitr eval=F
 101 ddply(lexdec, .(Subject), function(df) data.frame(RT.mean=mean(df$RT)))
 102 
 103 
 104 ## @knitr ddply-output-exercises, eval=F
 105 library(plyr)
 106 data(lexdec, package='languageR')       # load the dataset if it isn't already
 107 ddply(lexdec, .(PrevType, Class), function(df) with(df, data.frame(meanRT=mean(RT))))
 108 
 109 
 110 ## @knitr 
 111 slowestTrials <- ddply(lexdec, .(Subject), subset, RT==max(RT))
 112 head(slowestTrials[, c('Subject', 'RT', 'Trial', 'Word')])
 113 
 114 
 115 ## @knitr eval=FALSE
 116 ## ddply(lexdec, .(Subject), function(df, ...) subset(df, ...), RT==max(RT))
 117 ## ddply(lexdec, .(Subject), function(df) subset(df, RT==max(RT)))
 118 
 119 
 120 ## @knitr eval=F
 121 ddply(lexdec, .(Subject), function(df) {
 122   df$RT.s <- scale(df$RT)
 123   return(df)
 124 })
 125 
 126 
 127 ## @knitr 
 128 lexdecScaledRT <- ddply(lexdec, .(Subject), transform, RT.s=scale(RT))
 129 
 130 
 131 ## @knitr summarise-RT-dist-plots, out.width='0.45\\textwidth', echo=FALSE, results='hide'
 132 ggplot(lexdecScaledRT, aes(x=RT, group=Subject)) + geom_density()
 133 ggplot(lexdecScaledRT, aes(x=RT.s, group=Subject)) + geom_density()
 134 
 135 
 136 ## @knitr eval=FALSE
 137 ## summarise(.data, summVar1=expr1, summVar2=expr2, ...)
 138 
 139 
 140 ## @knitr 
 141 lexdec.RTsumm <- ddply(lexdec, .(Subject), summarise, mean=mean(RT), var=var(RT))
 142 head(lexdec.RTsumm)
 143 
 144 
 145 ## @knitr eval=F
 146 ddply(lexdec, .(Subject), function(df) with(df, data.frame(meanRT=mean(RT))))
 147 
 148 
 149 ## @knitr 
 150 lexdec <- ddply(lexdec, .(Subject), transform, RTrank=rank(RT))
 151 
 152 
 153 word.rt.ranks <- ddply(lexdec, .(Word, Frequency), summarise,
 154                        RTrank=mean(RTrank),
 155                        RTrank25=quantile(RTrank, 0.25), RTrank75=quantile(RTrank, 0.75))
 156 
 157 
 158 ## @knitr out.width='0.9\\textwidth'
 159 ggplot(word.rt.ranks, aes(x=Frequency, y=RTrank, ymin=RTrank25, ymax=RTrank75)) +
 160   geom_pointrange()
 161 
 162 
 163 ## @knitr 
 164 subset(word.rt.ranks,
 165        RTrank %in% c(max(RTrank), min(RTrank)))
 166 
 167 
 168 ## @knitr 
 169 ddply(lexdec, .(NativeLanguage), summarise, acc=mean(Correct=='correct'))
 170 
 171 
 172 ## @knitr 
 173 daply(lexdec, .(NativeLanguage, Correct), nrow)
 174 
 175 
 176 ## @knitr 
 177 correctCounts <- daply(lexdec, .(NativeLanguage, Correct), nrow)
 178 chisq.test(correctCounts)
 179 
 180 
 181 ## @knitr 
 182 subjectSlopes <- ddply(lexdec, .(Subject), function(df) {coef(lm(RT~Frequency, data=df))})
 183 
 184 
 185 ## @knitr 
 186 summary(subjectSlopes$Frequency)
 187 
 188 
 189 ## @knitr subject-slopes-rt
 190 ggplot(lexdec, aes(x=Frequency, y=RT)) +
 191   geom_point() +
 192   facet_wrap(~Subject) + 
 193   geom_abline(data=subjectSlopes, aes(slope=Frequency, intercept=`(Intercept)`), color='red')
 194 
 195 
 196 ## @knitr 
 197 lexdec <- transform(lexdec,
 198                     FreqHiLo=factor(ifelse(Frequency>median(Frequency), 
 199                                            'high', 'low'),
 200                                     levels=c('low', 'high')))
 201 
 202 
 203 ## @knitr results='hide', echo=FALSE
 204 # this is necessary because of something strange in how knitr/plyr are
 205 # using environments
 206 se <- function(x) sd(x)/sqrt(length(x))
 207 
 208 
 209 ## @knitr 
 210 se <- function(x) sd(x)/sqrt(length(x))
 211 langfreq.summ <- ddply(lexdec,
 212                        .(NativeLanguage, FreqHiLo),
 213                        summarise, mean=mean(RT), se=se(RT))
 214 
 215 
 216 ## @knitr fig.width=6, out.width='0.75\\textwidth'
 217 ggplot(langfreq.summ, aes(x=FreqHiLo, color=NativeLanguage,
 218                           y=mean, ymin=mean-1.96*se, ymax=mean+1.96*se)) +
 219   geom_point() +
 220   geom_errorbar(width=0.1) + 
 221   geom_line(aes(group=NativeLanguage))
 222 
 223 
 224 ## @knitr 
 225 langfreq.bysub <- ddply(lexdec, .(NativeLanguage, FreqHiLo, Subject),
 226                         summarise, RT=mean(RT))
 227 
 228 
 229 ## @knitr 
 230 langfreq.bysub.summ <- ddply(langfreq.bysub,
 231                              .(NativeLanguage, FreqHiLo),
 232                              summarise, mean=mean(RT), se=se(RT))
 233 
 234 
 235 ## @knitr by-subject-errorbars, fig.width=6, out.width='0.75\\textwidth'
 236 ggplot(langfreq.bysub.summ, aes(x=FreqHiLo, color=NativeLanguage,
 237                                 y=mean, ymin=mean-1.96*se, ymax=mean+1.96*se)) +
 238   geom_point() +
 239   geom_errorbar(width=0.1) + 
 240   geom_line(aes(group=NativeLanguage))
 241 
 242 
 243 ## @knitr 
 244 langfreq.byitem <- ddply(lexdec, .(NativeLanguage, FreqHiLo, Word), summarise, RT=mean(RT))
 245 langfreq.byitem.summ <- ddply(langfreq.byitem,
 246                              .(NativeLanguage, FreqHiLo),
 247                              summarise, mean=mean(RT), se=se(RT))
 248 
 249 ## ggplot(langfreq.byitem.summ, aes(x=FreqHiLo, color=NativeLanguage,
 250 ##                           y=mean, ymin=mean-2*se, ymax=mean+2*se)) +
 251 ##   geom_point() +
 252 ##   geom_errorbar(width=0.1) + 
 253 ##   geom_line(aes(group=NativeLanguage))
 254 
 255 
 256 ## @knitr echo=F, results='hide', fig.show='hide'
 257 library(plyr)
 258 library(mvtnorm)
 259 library(lme4)
 260 
 261 make.data.generator <- function(true.effects=c(0,0),
 262                                 resid.var=1,
 263                                 ranef.var=diag(c(1,1)),
 264                                 n.subj=24,
 265                                 n.obs=24
 266                                 ) 
 267 {   
 268   # create design matrix for our made up experiment
 269   data.str <- data.frame(freq=factor(c(rep('high', n.obs/2), rep('low', n.obs/2))))
 270   contrasts(data.str$freq) <- contr.sum(2)
 271   model.mat <- model.matrix(~ 1 + freq, data.str)
 272   
 273   generate.data <- function() {
 274     # sample data set
 275     simulated.data <- rdply(n.subj, {
 276       beta <- t(rmvnorm(n=1, sigma=ranef.var)) + true.effects
 277       expected.RT <- model.mat %*% beta
 278       epsilon <- rnorm(n=length(expected.RT), mean=0, sd=sqrt(resid.var))
 279       data.frame(data.str,
 280                  RT=expected.RT + epsilon)
 281     })
 282     names(simulated.data)[1] <- 'subject'
 283     simulated.data
 284   }
 285 }
 286 
 287 fit.models <- function(simulated.data) {
 288   # fit models and extract coefs
 289   lm.coefs <- coefficients(summary(lm(RT ~ 1+freq, simulated.data)))[, 1:3]
 290   rand.int.coefs <- summary(lmer(RT ~ 1+freq + (1|subject), simulated.data))@coefs
 291   rand.slope.coefs <- summary(lmer(RT ~ 1+freq + (1+freq|subject), simulated.data))@coefs
 292   # format output all pretty
 293   rbind(data.frame(model='lm', predictor=rownames(lm.coefs), lm.coefs),
 294         data.frame(model='rand.int', predictor=rownames(rand.int.coefs), rand.int.coefs),
 295         data.frame(model='rand.slope', predictor=rownames(rand.slope.coefs), rand.slope.coefs))
 296 }
 297 
 298 
 299 gen.dat <- make.data.generator()
 300 
 301 simulations <- rdply(.n=100,
 302                      fit.models(gen.dat()),
 303                      .progress='text')
 304 
 305 head(simulations, n=20)
 306 sim.m <- melt(simulations, measure.var=c('Estimate', 'Std..Error', 't.value'))
 307 head(sim.m)
 308 
 309 ddply(simulations, .(model, predictor), summarise, type1err=mean(abs(t.value)>1.96))
 310 
 311 
 312 
 313 ## @knitr data-simulation-model-fitting-preview, fig.width=5, fig.height=3.5, out.width='\\textwidth', echo=F, results='hide'
 314 
 315 ggplot(simulations, aes(x=t.value, color=model)) + 
 316   geom_vline(xintercept=c(-1.96, 1.96), color='#888888', linetype=3)  +
 317   scale_x_continuous('t value') + 
 318   geom_density() +
 319   facet_grid(predictor~.)
 320 
 321 
 322 
 323 ## @knitr eval=F
 324 library(plyr)
 325 library(mvtnorm)
 326 library(lme4)
 327 
 328 make.data.generator <- function(true.effects=c(0,0),
 329                                 resid.var=1,
 330                                 ranef.var=diag(c(1,1)),
 331                                 n.subj=24,
 332                                 n.obs=24
 333                                 ) 
 334 {   
 335   # create design matrix for our made up experiment
 336   data.str <- data.frame(freq=factor(c(rep('high', n.obs/2), rep('low', n.obs/2))))
 337   contrasts(data.str$freq) <- contr.sum(2)
 338   model.mat <- model.matrix(~ 1 + freq, data.str)
 339   
 340   generate.data <- function() {
 341     # sample data set under mixed effects model with random slope/intercepts
 342     simulated.data <- rdply(n.subj, {
 343       beta <- t(rmvnorm(n=1, sigma=ranef.var)) + true.effects
 344       expected.RT <- model.mat %*% beta
 345       epsilon <- rnorm(n=length(expected.RT), mean=0, sd=sqrt(resid.var))
 346       data.frame(data.str,
 347                  RT=expected.RT + epsilon)
 348     })
 349     names(simulated.data)[1] <- 'subject'
 350     simulated.data
 351   }
 352 }
 353 
 354 
 355 ## @knitr eval=F
 356 fit.models <- function(simulated.data) {
 357   # fit models and extract coefs
 358   lm.coefs <- coefficients(summary(lm(RT ~ 1+freq, simulated.data)))[, 1:3]
 359   rand.int.coefs <- summary(lmer(RT ~ 1+freq + (1|subject), simulated.data))@coefs
 360   rand.slope.coefs <- summary(lmer(RT ~ 1+freq + (1+freq|subject), simulated.data))@coefs
 361   # format output all pretty
 362   rbind(data.frame(model='lm', predictor=rownames(lm.coefs), lm.coefs),
 363         data.frame(model='rand.int', predictor=rownames(rand.int.coefs), rand.int.coefs),
 364         data.frame(model='rand.slope', predictor=rownames(rand.slope.coefs), rand.slope.coefs))
 365 }
 366 
 367 
 368 ## @knitr eval=F
 369 gen.dat <- make.data.generator()
 370 simulations <- rdply(.n=100,
 371                      fit.models(gen.dat()),
 372                      .progress='text')
 373 
 374 
 375 ## @knitr 
 376 head(simulations)
 377 daply(simulations, .(model, predictor), function(df) type1err=mean(abs(df$t.value)>1.96))
 378 
 379 
 380 ## @knitr fig.width=5, fig.height=3.5, out.width='0.75\\textwidth'
 381 # use reshape2::melt to get the data into a more convenient format (see next section)
 382 ggplot(simulations, aes(x=t.value, color=model)) +
 383   geom_vline(xintercept=c(-1.96, 1.96), color='#888888', linetype=3) + 
 384   scale_x_continuous('t value') + 
 385   geom_density() +
 386   facet_grid(predictor~.)
 387 
 388 
 389 ## @knitr 
 390 head(params <- expand.grid(n.obs=c(4, 16, 64), n.subj=c(4, 16, 64)))
 391 
 392 
 393 ## @knitr eval=F
 394 man.simulations <- mdply(params, function(...) {
 395                            make.data <- make.data.generator(...)
 396                            rdply(.n=100, fit.models(make.data()))
 397                          }, .progress='text')
 398 
 399 
 400 ## @knitr eval=F
 401 mdply(params, function(...) {
 402   make.data <- make.data.generator(...)
 403   rdply(.n=100, fit.models(make.data()))
 404 }, .progress='text')
 405 
 406 
 407 ## @knitr eval=FALSE
 408 ## ddply(params, .(n.obs, n.subj), function(df) {
 409 ##   make.data <- make.data.generator(n.obs=df$n.obs, n.subj=df$n.subj)
 410 ##   rdply(.n=100, .fun=fit.models(make.data()))
 411 ## }, .progress='text')
 412 
 413 
 414 ## @knitr echo=F
 415 ld.wide[1:7, 1:7]
 416 
 417 
 418 ## @knitr echo=F
 419 ld.long[1:5, ]
 420 
 421 
 422 ## @knitr echo=F
 423 ld.wide[1:5, 1:4]
 424 
 425 
 426 ## @knitr 
 427 ld.wide[1:5, 1:7]
 428 
 429 
 430 ## @knitr 
 431 head(ld.m <- melt(ld.wide, id.var='Subject', na.rm=T))
 432 
 433 
 434 ## @knitr 
 435 require(stringr)
 436 trials.and.vars <- ldply(str_split(ld.m$variable, '_'))
 437 names(trials.and.vars) <- c('Trial', 'measure')
 438 
 439 
 440 ## @knitr 
 441 head(ld.m <- cbind(ld.m, trials.and.vars))
 442 
 443 
 444 ## @knitr 
 445 head(dcast(ld.m, Subject+Trial ~ measure))
 446 
 447 
 448 ## @knitr 
 449 head(dcast(ld.m, ... ~ measure))
 450 
 451 
 452 ## @knitr 
 453 ld.m$variable <- NULL
 454 head(dcast(ld.m, ... ~ measure))
 455 
 456 
 457 ## @knitr 
 458 head(dcast(ld.m, Subject ~ measure))
 459 
 460 
 461 ## @knitr results='hide'
 462 melt(ld.wide, id.var='Subject', na.rm=T)
 463 
 464 
 465 ## @knitr results='hide'
 466 ddply(ld.wide, .(Subject), function(df) {
 467   vars <- names(df)
 468   vals <- t(df)
 469   dimnames(vals) <- NULL
 470   return(subset(data.frame(variable=vars, value=vals),
 471                 variable != 'Subject' & !is.na(value)))
 472 })
 473 
 474 
 475 ## @knitr results='hide'
 476 head(dcast(ld.m, Subject+Trial ~ measure))
 477 
 478 
 479 ## @knitr 
 480 head(ddply(ld.m, .(Subject, Trial), function(df) {
 481   res <- data.frame(t(df$value))
 482   names(res) <- df$measure
 483   return(res)
 484 }))

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.

[get | view] (2021-04-22 12:55:37, 1045.4 KB) [[attachment:LSA13-AuxLecture 1&2 - Coding.pdf]]
[get | view] (2021-04-22 12:55:37, 10.7 KB) [[attachment:LSA13-AuxLecture5-TimeSeriesData-eye-tracking-analysis.R]]
[get | view] (2021-04-22 12:55:37, 378.0 KB) [[attachment:LSA13-AuxLecture5-TimeSeriesData.pdf]]
[get | view] (2021-04-22 12:55:37, 735.1 KB) [[attachment:LSA13-CourseOverview.pdf]]
[get | view] (2021-04-22 12:55:37, 8.6 KB) [[attachment:LSA13-Lecture1-GLM.R]]
[get | view] (2021-04-22 12:55:37, 2879.2 KB) [[attachment:LSA13-Lecture1-GLM.pdf]]
[get | view] (2021-04-22 12:55:37, 7.0 KB) [[attachment:LSA13-Lecture2-GLMM.R]]
[get | view] (2021-04-22 12:55:37, 1337.3 KB) [[attachment:LSA13-Lecture2-GLMM.pdf]]
[get | view] (2021-04-22 12:55:37, 1011.4 KB) [[attachment:LSA13-Lecture3-BeyondLinearModels.pdf]]
[get | view] (2021-04-22 12:55:37, 13.0 KB) [[attachment:LSA13-Lecture4-plyr-reshape.R]]
[get | view] (2021-04-22 12:55:37, 467.8 KB) [[attachment:LSA13-Lecture4-plyr-reshape.pdf]]
[get | view] (2021-04-22 12:55:37, 13.8 KB) [[attachment:LSA13-Lecture5-ggplot.R]]
[get | view] (2021-04-22 12:55:37, 2058.0 KB) [[attachment:LSA13-Lecture5-ggplot.pdf]]
[get | view] (2021-04-22 12:55:37, 2920.0 KB) [[attachment:LSA13-Lecture6-CommonIssuesAndSolutions.pdf]]
[get | view] (2021-04-22 12:55:37, 1581.0 KB) [[attachment:LSA13-Lecture6-Reporting.pdf]]
[get | view] (2021-04-22 12:55:37, 3.7 KB) [[attachment:LSA13-PreLecture2-ProblemSet.R]]
[get | view] (2021-04-22 12:55:37, 608.5 KB) [[attachment:LSA13-PreLecture2-ProblemSet.pdf]]
[get | view] (2021-04-22 12:55:37, 145.1 KB) [[attachment:data.zip]]
[get | view] (2021-04-22 12:55:37, 1284.6 KB) [[attachment:scripts.zip]]

All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.