attachment:day1-script.R of StatsCourses

   1 ###########################################################
   2 # 
   3 # Below, empty lines correspond to slide breaks in the 
   4 # presentation. I've also marked the major sections breaks.
   5 #
   6 # For questions, contact Florian Jaeger, 
   7 #                tiflo@bcs.rochester.edu
   8 #
   9 # Also consider, subscribing to R-lang, an email list for
  10 # language researchers working in R 
  11 # (http://pidgin.ucsd.edu/mailman/listinfo/r-lang)
  12 #
  13 # you can get help on any command by typing ?command into
  14 # the R GUI.
  15 #
  16 # Lines of the script are executed with CTRL-R (Windows) or
  17 # APPLE-ENTER (Mac OS)
  18 #
  19 ###########################################################
  20 
  21 ## --------------------------------------------------------
  22 # Functions (run first)
  23 ## --------------------------------------------------------
  24 center <- function(x) { return(x - mean(x, na.rm=T)) }
  25 
  26 ## --------------------------------------------------------
  27 # Change to your working directory
  28 ## --------------------------------------------------------
  29 setwd("C:\\Documents and Settings\\florian\\My Documents\\My LabMeetings, Tutorials, & Teaching\\Regression Workshops\\McGill10\\figs")
  30 
  31 ## --------------------------------------------------------
  32 # Lexical decision time data
  33 ## --------------------------------------------------------
  34 library(languageR)
  35 head(lexdec[,c(1,2,5,10,11)])
  36 
  37 ?lexdec
  38 summary(lexdec)
  39 str(lexdec)
  40 names(lexdec)
  41 
  42 
  43 ## --------------------------------------------------------
  44 # Ordinary Linear Regression - An example
  45 ## --------------------------------------------------------
  46 l = glm(RT ~ 1 + Frequency, data=lexdec, 
  47 	family="gaussian")
  48 summary(l)
  49 sqrt(summary(l)[["dispersion"]])
  50 
  51 ## --------------------------------------------------------
  52 # Ordinary Linear Regression - Geometric intuitions
  53 ## --------------------------------------------------------
  54 library(languageR)
  55 library(Design)
  56 
  57 l.lexdec0 = lm(RT ~ 1, data=lexdec)
  58 summary(l.lexdec0)
  59 plot(x=lexdec$RT, 
  60 	xlab="Case Index", 
  61 	ylab="Response latency (in log-transformed msecs)", 
  62 	main="Predicting Lexical Decision RTs")
  63 abline(coef(l.lexdec0)[1], 0, col="orange", lwd=3)
  64 
  65 l.lexdec1 = lm(RT ~ 1 + Frequency, data=lexdec)
  66 plot(x=lexdec$Frequency, y=lexdec$RT, 
  67 	xlab="Word Frequency (log-transformed)", 
  68 	ylab="Response latency (in log-transformed msecs)", 
  69 	main="Predicting Lexical Decision RTs")
  70 abline(coef(l.lexdec1), col="orange", lwd=3)
  71 
  72 plot(x=exp(lexdec$Frequency), y=exp(lexdec$RT), 
  73 	xlab="Word Frequency", 
  74 	ylab="Response latency (in msecs)", 
  75 	main="Predicting Lexical Decision RTs")
  76 curve(exp(coef(l.lexdec1)[1] + 
  77 	coef(l.lexdec1)[2]*log(x)), 
  78 	col="orange", lwd=3, add=T)
  79 # see also the plot.Design function
  80 # for which you have to fit the model with ols()
  81 
  82 library(scatterplot3d)
  83 l.lexdec2 = lm(RT ~ 1 + Frequency + FamilySize, data=lexdec)
  84 summary(l.lexdec2)
  85 
  86 s = scatterplot3d(x=lexdec$Frequency, y=lexdec$FamilySize, 
  87 	z=lexdec$RT,
  88 	xlab="Word Frequency (log-transformed)", 
  89 	ylab="Number of morph. family members (log-transformed)",
  90 	zlab="Response latency (in log-transformed msecs)", 
  91 	main="Predicting Lexical Decision RTs")
  92 s$plane3d(l.lexdec2, col= "orange", lwd=2, lty.box = "solid")
  93 
  94 l.lexdec3 = lm(RT ~ 1 + Frequency + FamilySize + 
  95 	NativeLanguage, data=lexdec)
  96 s = scatterplot3d(x=lexdec$Frequency, y=lexdec$FamilySize, 
  97 	z=lexdec$RT,
  98 	xlab="Word Frequency (log-transformed)", 
  99 	ylab="Number of morph. family members (log-transformed)",
 100 	zlab="Response latency (in log-transformed msecs)", 
 101 	main="Predicting Lexical Decision RTs",
 102 	sub="Native Speakers (red) and\nNon-Native Speakers (orange)")
 103 s$plane3d(coef(l.lexdec3)[1], 
 104 	coef(l.lexdec3)[2], coef(l.lexdec3)[3], 
 105 	col= "red", lwd=2, 
 106 	lty.box = "solid")
 107 s$plane3d(coef(l.lexdec3)[1] + coef(l.lexdec3)[4], 
 108 	coef(l.lexdec3)[2], coef(l.lexdec3)[3], 
 109 	col= "orange", lwd=2, 
 110 	lty.box = "solid")
 111 
 112 l.lexdec4 = lm(RT ~ 1 + Frequency + FamilySize + 
 113 	NativeLanguage + NativeLanguage:Frequency, 
 114 	data=lexdec)
 115 summary(l.lexdec4)
 116 s = scatterplot3d(x=lexdec$Frequency, y=lexdec$FamilySize, 
 117 	z=lexdec$RT,
 118 	xlab="Word Frequency (log-transformed)", 
 119 	ylab="Number of morph. family members (log-transformed)",
 120 	zlab="Response latency (in log-transformed msecs)", 
 121 	main="Predicting Lexical Decision RTs",
 122 	sub="Interaction with Native Speakers (red) and\nNon-Native Speakers (orange)")
 123 s$plane3d(coef(l.lexdec4)[1], 
 124 	coef(l.lexdec4)[2], coef(l.lexdec4)[3], 
 125 	col= "red", lwd=2, 
 126 	lty.box = "solid")
 127 s$plane3d(coef(l.lexdec4)[1] + coef(l.lexdec4)[4], 
 128 	coef(l.lexdec4)[2] + coef(l.lexdec4)[5], 
 129 	coef(l.lexdec4)[3], 
 130 	col= "orange", lwd=2, 
 131 	lty.box = "solid")
 132 
 133 ## --------------------------------------------------------
 134 # Random Effects
 135 ## --------------------------------------------------------
 136 setwd("C:\\Documents and Settings\\florian\\My Documents\\My LabMeetings, Tutorials, & Teaching\\Regression Workshops\\PennState\\figs")
 137 library(languageR)
 138 
 139 o = exp(as.vector(tapply(lexdec$RT, lexdec$Subject, mean)))
 140 plot(o, type = "n", xlab = "Subject Index", 
 141 	ylab = "Response latency in msecs", 
 142 	main = "Subject as random effect")
 143 text(o, as.character(unique(lexdec$Subject)), col = "blue")
 144 abline(mean(exp(lexdec$RT)),0, col = "gray", lwd=2)
 145 
 146 lmer.lexdec0 = lmer(RT ~ 1 + Frequency + 
 147 	(1 | Subject), data=lexdec)
 148 
 149 cor(fitted(lmer.lexdec0), lexdec$RT)^2
 150 
 151 pvals.fnc(lmer.lexdec0, nsim = 10000)
 152 
 153 ranef(lmer.lexdec0)
 154 
 155 p = exp(as.vector(unlist(coef(lme.lexdec0)$Subject)))
 156 text(p, as.character(unique(lexdec$Subject)), col = "red")
 157 legend(x=2, y=850, legend=c("Predicted", "Observed"), 
 158 	col=c("blue", "red"), pch=1)
 159 
 160 lmer.lexdec1 = lmer(RT ~ 1 + (1 | Subject) + (1 | Word), 
 161 	data=lexdec)
 162 o = exp(as.vector(tapply(lexdec$RT, lexdec$Word, mean, na.rm=T)))
 163 p = exp(as.vector(unlist(coef(lmer.lexdec1)$Word)))
 164 plot(o, type = "n", xlab = "Word Index", 
 165 	ylab = "Response latency in msecs", 
 166 	main = "Word as random effect")
 167 abline(mean(exp(lexdec$RT)),0, col = "gray", lwd=2)
 168 text(o, as.character(unique(lexdec$Word)), col = "blue")
 169 segments(x0=1:length(o), y0=o, x1=1:length(p), 
 170 	y1=p, col="red", lwd=2)
 171 points(p, pch=ifelse(p > o, "^", "v"), col="red", lwd=2)
 172 
 173 lmer.lexdec2 = lmer(RT ~ 1 + Frequency +
 174 	(1 | Subject) + (0 + Frequency | Subject) + 
 175 	(1 | Word), 
 176 	data=lexdec)
 177 
 178 ranef(lmer.lexdec2)
 179 
 180 lmer.lexdec3 = lmer(RT ~ 1 + Frequency +
 181 	(1 + Frequency | Subject) + (1 | Word), 
 182 	data=lexdec)
 183 
 184 plot(ranef(lmer.lexdec3)$Subject, 
 185 	main="Random Effect Correlation")
 186 
 187 
 188 # for all models
 189 dotplot(ranef(lmer.lexdec3))
 190 qqmath(ranef(lmer.lexdec3))
 191 
 192 # currently only for models with relatively
 193 # simple random effects structures
 194 dotplot(ranef(lmer.lexdec3, postVar=TRUE))
 195 dotplot(ranef(lmer.lexdec3, postVar=TRUE), 
 196 	scales = list(x = 
 197 		list(relation = 'free')))[["Subject"]]
 198 
 199 qqmath(ranef(lmer.lexdec3, postVar=TRUE))
 200 
 201 # To see what the postVar=TRUE option does, see ?ranef, or look at the
 202 # code returned by getMethod(ranef, "mer").  There are also some examples
 203 # on the ranef help page.
 204 
 205 anova(lmer.lexdec3, lmer.lexdec2)
 206 anova(lmer.lexdec3, lmer.lexdec3.simple = 
 207 	lmer(RT ~ 1 + Frequency +
 208 	(1 | Subject) + (1 | Word), 
 209 	data=lexdec)
 210 )
 211 
 212 # center variables 
 213 lexdec$cNativeEnglish <- center(ifelse(
 214 	lexdec$NativeLanguage == "English", 1,0))
 215 lexdec$cFrequency <- center(lexdec$Frequency)
 216 lexdec$cFamilySize <- center(lexdec$FamilySize)
 217 lexdec$cSynsetCount <- center(lexdec$SynsetCount)
 218 lexdec$cPlant <- center(ifelse(
 219 	lexdec$Class == "plant", 1,0))
 220 
 221 lmer.lexdec4 = lmer(RT ~ 1 + cNativeEnglish * (
 222 	cFrequency + cFamilySize +	cSynsetCount + 
 223 	cPlant) + 
 224 	(1 + Frequency | Subject) + (1 | Word), 
 225 	data=lexdec)
 226 print(lmer.lexdec4, corr=F)
 227 
 228 # for printing with plotLMER.fnc, let's use the
 229 # uncentered variables.
 230 lmer.lexdec4b = lmer(RT ~ 1 + NativeLanguage * (
 231 	Frequency + FamilySize + SynsetCount + 
 232 	Class) + 
 233 	(1 | Subject) + (1 | Word), 
 234 	data=lexdec)
 235 p.lmer.lexdec4b = pvals.fnc(lmer.lexdec4b, 
 236 	nsim=10000, withMCMC=T)
 237 p.lmer.lexdec
 238 plotLMER.fnc(lmer.lexdec4b, pred="Frequency", 
 239 	mcmcMat=p.lmer.lexdec4b$mcmc,
 240 	intr=list("NativeLanguage", c("English", "Other"),
 241 		"end", list(c("red", "orange"), 
 242 		c(1,1))), cex=1.3, lwd=2)		
 243 
 244 ## --------------------------------------------------------
 245 # Ordinary Logistic Regression - An example
 246 ## --------------------------------------------------------
 247 library(languageR)
 248 lmer.lexdec.answer4 = lmer(Correct == "correct" ~ 
 249 	1 + cNativeEnglish * (
 250 	cFrequency + cFamilySize +	cSynsetCount + 
 251 	cPlant) + 
 252 	(1 + Frequency | Subject) + (1 | Word), 
 253 	data=lexdec, family="binomial")
 254 
 255 lmer.lexdec.answer4b = lmer(Correct == "correct" ~ 
 256 	1 + NativeLanguage * (
 257 	Frequency + FamilySize + SynsetCount + 
 258 	Class) + 
 259 	(1 + Frequency | Subject) + (1 | Word), 
 260 	data=lexdec, family="binomial")
 261 plotLMER.fnc(lmer.lexdec.answer4b, pred="Frequency", 
 262 	intr=list("NativeLanguage", c("English", "Other"),
 263 		"end", list(c("red", "orange"), 
 264 		c(1,1))), cex=1.3, lwd=2, fun=I)	
 265 plotLMER.fnc(lmer.lexdec.answer4b, pred="Frequency", 
 266 	intr=list("NativeLanguage", c("English", "Other"),
 267 		"end", list(c("red", "orange"), 
 268 		c(1,1))), cex=1.3, lwd=2)		
 269 
 270 
 271 
 272 
 273 
 274 setwd("C:\\Documents and Settings\\florian\\My Documents\\_Papers\\GillespieJaeger_OCP\\data\\data")
 275 
 276 d= as.data.frame(read.csv(file="results.txt", sep="\t",
 277 	header=F))
 278 names(d) = c('Sequence', 'CountThat', 'CountNoThat')
 279 
 280 d$ProportionThat = d$CountThat / (d$CountThat + d$CountNoThat)
 281 d$HeadNoun = as.factor(sub("^([a-zA-Z]+)\\s.+$", "\\1", d$Sequence))
 282 d$RcDet = as.factor(sub("^[a-zA-Z]+\\sthat\\s([a-zA-Z]+)\\s.+$", "\\1", d$Sequence))
 283 d$RcNoun = as.factor(sub("^.*\\s([a-zA-Z]+)\\s[a-zA-Z]+$", "\\1", d$Sequence))
 284 d$RcVerb = as.factor(sub("^.*\\s([a-zA-Z]+)$", "\\1", d$Sequence))
 285 
 286 poisson.ocp1 = glm(CountThat ~ RcDet, data= d, family= "poisson")
 287 summary(poisson.ocp1)
 288 
 289 d.that = d
 290 d.that$That = 1
 291 d.nothat = d
 292 d.nothat$That = 0
 293 dd = rbind(d.that, d.nothat)
 294 rm(d.that, d.nothat)
 295 
 296 dd$Weight = ifelse(dd$That == 1, dd$CountThat, dd$CountNoThat)
 297 table(dd$That, is.na(dd$Weight))
 298 dd = subset(dd, !is.na(CountThat) & !is.na(CountNoThat))
 299 summary(dd$That == 1)
 300 dd$CountThat = NULL
 301 dd$CountNoThat = NULL
 302 dd$HeadNounCount = table(dd$HeadNoun)[as.character(dd$HeadNoun)]
 303 dd$RcNounCount = table(dd$RcNoun)[as.character(dd$RcNoun)]
 304 dd$RcVerbCount = table(dd$RcVerb)[as.character(dd$RcVerb)]
 305 
 306 library(Design)
 307 dd.dist = datadist(dd)
 308 options(datadist = 'dd.dist')
 309 logistic.ocp1 = lrm(That ~ RcDet, data= dd, weights= Weight, 
 310 	subset= RcDet %in% c('this','that'))
 311 
 312 logistic.ocp1 = lrm(That ~ RcDet, data= dd, weight= Weight)
 313 plot(logistic.ocp1, xlab= "Determiner at RC onset", 
 314 	ylab="log-odds of relativizer")
 315 plot(logistic.ocp1, xlab= "Determiner at RC onset", 
 316 	ylab="probability of relativizer", fun= plogis)
 317 
 318 library(lme4)
 319 dd.sub = subset(dd, HeadNounCount > 10 &
 320 	RcNounCount > 10)
 321 nrow(dd.sub)
 322 lmer.ocp1 = lmer(That ~ RcDet + (1 + RcDet | RcNoun) + (1 | HeadNoun), data= dd.sub, 
 323 	subset= RcDet %in% c('this','that'),
 324 	weights= Weight, family="binomial")
Attachment 'day1-script.R'

Attached Files