attachment:Code-Rochester-IntroToRegressionModels.R of Feb2011Regression

   1 ###########################################################
   2 # 
   3 # Below, empty lines correspond to slide breaks in the 
   4 # presentation. I've also marked the major sections breaks.
   5 #
   6 # For questions, contact Florian Jaeger, 
   7 #                tiflo@bcs.rochester.edu
   8 #
   9 # Also consider, subscribing to R-lang, an email list for
  10 # language researchers working in R 
  11 # (http://pidgin.ucsd.edu/mailman/listinfo/r-lang)
  12 #
  13 # you can get help on any command by typing ?command into
  14 # the R GUI.
  15 #
  16 # Lines of the script are executed with CTRL-R (Windows) or
  17 # APPLE-ENTER (Mac OS)
  18 #
  19 ###########################################################
  20 
  21 ## --------------------------------------------------------
  22 # Functions (run first)
  23 ## --------------------------------------------------------
  24 center <- function(x) { return(x - mean(x, na.rm=T)) }
  25 
  26 ## --------------------------------------------------------
  27 # Change to your working directory
  28 ## --------------------------------------------------------
  29 setwd("C:\\Documents and Settings\\florian\\My Documents\\My LabMeetings, Tutorials, & Teaching\\Regression Workshops\\McGill10\\figs")
  30 
  31 ## --------------------------------------------------------
  32 # Lexical decision time data
  33 ## --------------------------------------------------------
  34 library(languageR)
  35 head(lexdec[,c(1,2,5,10,11)])
  36 
  37 ?lexdec
  38 summary(lexdec)
  39 str(lexdec)
  40 names(lexdec)
  41 
  42 
  43 ## --------------------------------------------------------
  44 # Ordinary Linear Regression - An example
  45 ## --------------------------------------------------------
  46 l = glm(RT ~ 1 + Frequency, data=lexdec, 
  47 	family="gaussian")
  48 summary(l)
  49 sqrt(summary(l)[["dispersion"]])
  50 
  51 ## --------------------------------------------------------
  52 # Ordinary Linear Regression - Geometric intuitions
  53 ## --------------------------------------------------------
  54 library(languageR)
  55 library(Design)
  56 
  57 l.lexdec0 = lm(RT ~ 1, data=lexdec)
  58 summary(l.lexdec0)
  59 plot(x=lexdec$RT, 
  60 	xlab="Case Index", 
  61 	ylab="Response latency (in log-transformed msecs)", 
  62 	main="Predicting Lexical Decision RTs")
  63 abline(coef(l.lexdec0)[1], 0, col="orange", lwd=3)
  64 
  65 l.lexdec1 = lm(RT ~ 1 + Frequency, data=lexdec)
  66 plot(x=lexdec$Frequency, y=lexdec$RT, 
  67 	xlab="Word Frequency (log-transformed)", 
  68 	ylab="Response latency (in log-transformed msecs)", 
  69 	main="Predicting Lexical Decision RTs")
  70 abline(coef(l.lexdec1), col="orange", lwd=3)
  71 
  72 plot(x=exp(lexdec$Frequency), y=exp(lexdec$RT), 
  73 	xlab="Word Frequency", 
  74 	ylab="Response latency (in msecs)", 
  75 	main="Predicting Lexical Decision RTs")
  76 curve(exp(coef(l.lexdec1)[1] + 
  77 	coef(l.lexdec1)[2]*log(x)), 
  78 	col="orange", lwd=3, add=T)
  79 # see also the plot.Design function
  80 # for which you have to fit the model with ols()
  81 
  82 library(scatterplot3d)
  83 l.lexdec2 = lm(RT ~ 1 + Frequency + FamilySize, data=lexdec)
  84 summary(l.lexdec2)
  85 
  86 s = scatterplot3d(x=lexdec$Frequency, y=lexdec$FamilySize, 
  87 	z=lexdec$RT,
  88 	xlab="Word Frequency (log-transformed)", 
  89 	ylab="Number of morph. family members (log-transformed)",
  90 	zlab="Response latency (in log-transformed msecs)", 
  91 	main="Predicting Lexical Decision RTs")
  92 s$plane3d(l.lexdec2, col= "orange", lwd=2, lty.box = "solid")
  93 
  94 l.lexdec3 = lm(RT ~ 1 + Frequency + FamilySize + 
  95 	NativeLanguage, data=lexdec)
  96 s = scatterplot3d(x=lexdec$Frequency, y=lexdec$FamilySize, 
  97 	z=lexdec$RT,
  98 	xlab="Word Frequency (log-transformed)", 
  99 	ylab="Number of morph. family members (log-transformed)",
 100 	zlab="Response latency (in log-transformed msecs)", 
 101 	main="Predicting Lexical Decision RTs",
 102 	sub="Native Speakers (red) and\nNon-Native Speakers (orange)")
 103 s$plane3d(coef(l.lexdec3)[1], 
 104 	coef(l.lexdec3)[2], coef(l.lexdec3)[3], 
 105 	col= "red", lwd=2, 
 106 	lty.box = "solid")
 107 s$plane3d(coef(l.lexdec3)[1] + coef(l.lexdec3)[4], 
 108 	coef(l.lexdec3)[2], coef(l.lexdec3)[3], 
 109 	col= "orange", lwd=2, 
 110 	lty.box = "solid")
 111 
 112 l.lexdec4 = lm(RT ~ 1 + Frequency + FamilySize + 
 113 	NativeLanguage + NativeLanguage:Frequency, 
 114 	data=lexdec)
 115 summary(l.lexdec4)
 116 s = scatterplot3d(x=lexdec$Frequency, y=lexdec$FamilySize, 
 117 	z=lexdec$RT,
 118 	xlab="Word Frequency (log-transformed)", 
 119 	ylab="Number of morph. family members (log-transformed)",
 120 	zlab="Response latency (in log-transformed msecs)", 
 121 	main="Predicting Lexical Decision RTs",
 122 	sub="Interaction with Native Speakers (red) and\nNon-Native Speakers (orange)")
 123 s$plane3d(coef(l.lexdec4)[1], 
 124 	coef(l.lexdec4)[2], coef(l.lexdec4)[3], 
 125 	col= "red", lwd=2, 
 126 	lty.box = "solid")
 127 s$plane3d(coef(l.lexdec4)[1] + coef(l.lexdec4)[4], 
 128 	coef(l.lexdec4)[2] + coef(l.lexdec4)[5], 
 129 	coef(l.lexdec4)[3], 
 130 	col= "orange", lwd=2, 
 131 	lty.box = "solid")
 132 
 133 ## --------------------------------------------------------
 134 # Random Effects
 135 ## --------------------------------------------------------
 136 setwd("C:\\Documents and Settings\\florian\\My Documents\\My LabMeetings, Tutorials, & Teaching\\Regression Workshops\\PennState\\figs")
 137 library(languageR)
 138 
 139 o = exp(as.vector(tapply(lexdec$RT, lexdec$Subject, mean)))
 140 plot(o, type = "n", xlab = "Subject Index", 
 141 	ylab = "Response latency in msecs", 
 142 	main = "Subject as random effect")
 143 text(o, as.character(unique(lexdec$Subject)), col = "blue")
 144 abline(mean(exp(lexdec$RT)),0, col = "gray", lwd=2)
 145 
 146 lmer.lexdec0 = lmer(RT ~ 1 + Frequency + 
 147 	(1 | Subject), data=lexdec)
 148 cor(fitted(lmer.lexdec0), lexdec$RT)^2
 149 pvals.fnc(lmer.lexdec0, nsim = 10000)
 150 ranef(lmer.lexdec0)
 151 
 152 lmer.lexdec0 = lmer(RT ~ 1 +	(1 | Subject), data=lexdec)
 153 
 154 p = exp(as.vector(unlist(coef(lmer.lexdec0)$Subject)))
 155 text(p, as.character(unique(lexdec$Subject)), col = "red")
 156 legend(x=2, y=850, legend=c("Observed","Predicted"), 
 157 	col=c("blue", "red"), pch=1)
 158 
 159 lmer.lexdec1 = lmer(RT ~ 1 + (1 | Subject) + (1 | Word), 
 160 	data=lexdec)
 161 o = exp(as.vector(tapply(lexdec$RT, lexdec$Word, mean, na.rm=T)))
 162 p = exp(as.vector(unlist(coef(lmer.lexdec1)$Word)))
 163 plot(o, type = "n", xlab = "Word Index", 
 164 	ylab = "Response latency in msecs", 
 165 	main = "Word as random effect")
 166 abline(mean(exp(lexdec$RT)),0, col = "gray", lwd=2)
 167 text(o, as.character(unique(lexdec$Word)), col = "blue")
 168 segments(x0=1:length(o), y0=o, x1=1:length(p), 
 169 	y1=p, col="red", lwd=2)
 170 points(p, pch=ifelse(p > o, "^", "v"), col="red", lwd=2)
 171 
 172 lmer.lexdec2 = lmer(RT ~ 1 + Frequency +
 173 	(1 | Subject) + (0 + Frequency | Subject) + 
 174 	(1 | Word), 
 175 	data=lexdec)
 176 
 177 ranef(lmer.lexdec2)
 178 
 179 lmer.lexdec3 = lmer(RT ~ 1 + Frequency +
 180 	(1 + Frequency | Subject) + (1 | Word), 
 181 	data=lexdec)
 182 
 183 plot(ranef(lmer.lexdec3)$Subject, 
 184 	main="Random Effect Correlation")
 185 
 186 
 187 # for all models
 188 dotplot(ranef(lmer.lexdec3))
 189 qqmath(ranef(lmer.lexdec3))
 190 
 191 # currently only for models with relatively
 192 # simple random effects structures
 193 dotplot(ranef(lmer.lexdec3, postVar=TRUE))
 194 dotplot(ranef(lmer.lexdec3, postVar=TRUE), 
 195 	scales = list(x = 
 196 		list(relation = 'free')))[["Subject"]]
 197 
 198 qqmath(ranef(lmer.lexdec3, postVar=TRUE))
 199 
 200 # To see what the postVar=TRUE option does, see ?ranef, or look at the
 201 # code returned by getMethod(ranef, "mer").  There are also some examples
 202 # on the ranef help page.
 203 
 204 anova(lmer.lexdec3, lmer.lexdec2)
 205 anova(lmer.lexdec3, lmer.lexdec3.simple = 
 206 	lmer(RT ~ 1 + Frequency +
 207 	(1 | Subject) + (1 | Word), 
 208 	data=lexdec)
 209 )
 210 
 211 # center variables 
 212 lexdec$cNativeEnglish <- center(ifelse(
 213 	lexdec$NativeLanguage == "English", 1,0))
 214 lexdec$cFrequency <- center(lexdec$Frequency)
 215 lexdec$cFamilySize <- center(lexdec$FamilySize)
 216 lexdec$cSynsetCount <- center(lexdec$SynsetCount)
 217 lexdec$cPlant <- center(ifelse(
 218 	lexdec$Class == "plant", 1,0))
 219 
 220 lmer.lexdec4 = lmer(RT ~ 1 + cNativeEnglish * (
 221 	cFrequency + cFamilySize +	cSynsetCount + 
 222 	cPlant) + 
 223 	(1 + Frequency | Subject) + (1 | Word), 
 224 	data=lexdec)
 225 print(lmer.lexdec4, corr=F)
 226 
 227 # for printing with plotLMER.fnc, let's use the
 228 # uncentered variables.
 229 lmer.lexdec4b = lmer(RT ~ 1 + NativeLanguage * (
 230 	Frequency + FamilySize + SynsetCount + 
 231 	Class) + 
 232 	(1 | Subject) + (1 | Word), 
 233 	data=lexdec)
 234 p.lmer.lexdec4b = pvals.fnc(lmer.lexdec4b, 
 235 	nsim=10000, withMCMC=T)
 236 p.lmer.lexdec
 237 plotLMER.fnc(lmer.lexdec4b, pred="Frequency", 
 238 	mcmcMat=p.lmer.lexdec4b$mcmc,
 239 	intr=list("NativeLanguage", c("English", "Other"),
 240 		"end", list(c("red", "orange"), 
 241 		c(1,1))), cex=1.3, lwd=2)		
 242 
 243 ## --------------------------------------------------------
 244 # Ordinary Logistic Regression - An example
 245 ## --------------------------------------------------------
 246 library(languageR)
 247 lmer.lexdec.answer4 = lmer(Correct == "correct" ~ 
 248 	1 + cNativeEnglish * (
 249 	cFrequency + cFamilySize +	cSynsetCount + 
 250 	cPlant) + 
 251 	(1 + Frequency | Subject) + (1 | Word), 
 252 	data=lexdec, family="binomial")
 253 
 254 lmer.lexdec.answer4b = lmer(Correct == "correct" ~ 
 255 	1 + NativeLanguage * (
 256 	Frequency + FamilySize + SynsetCount + 
 257 	Class) + 
 258 	(1 + Frequency | Subject) + (1 | Word), 
 259 	data=lexdec, family="binomial")
 260 plotLMER.fnc(lmer.lexdec.answer4b, pred="Frequency", 
 261 	intr=list("NativeLanguage", c("English", "Other"),
 262 		"end", list(c("red", "orange"), 
 263 		c(1,1))), cex=1.3, lwd=2, fun=I)	
 264 plotLMER.fnc(lmer.lexdec.answer4b, pred="Frequency", 
 265 	intr=list("NativeLanguage", c("English", "Other"),
 266 		"end", list(c("red", "orange"), 
 267 		c(1,1))), cex=1.3, lwd=2)		
 268 
 269 
 270 
 271 
 272 
 273 setwd("C:\\Documents and Settings\\florian\\My Documents\\_Papers\\GillespieJaeger_OCP\\data\\data")
 274 
 275 d= as.data.frame(read.csv(file="results.txt", sep="\t",
 276 	header=F))
 277 names(d) = c('Sequence', 'CountThat', 'CountNoThat')
 278 
 279 d$ProportionThat = d$CountThat / (d$CountThat + d$CountNoThat)
 280 d$HeadNoun = as.factor(sub("^([a-zA-Z]+)\\s.+$", "\\1", d$Sequence))
 281 d$RcDet = as.factor(sub("^[a-zA-Z]+\\sthat\\s([a-zA-Z]+)\\s.+$", "\\1", d$Sequence))
 282 d$RcNoun = as.factor(sub("^.*\\s([a-zA-Z]+)\\s[a-zA-Z]+$", "\\1", d$Sequence))
 283 d$RcVerb = as.factor(sub("^.*\\s([a-zA-Z]+)$", "\\1", d$Sequence))
 284 
 285 poisson.ocp1 = glm(CountThat ~ RcDet, data= d, family= "poisson")
 286 summary(poisson.ocp1)
 287 
 288 d.that = d
 289 d.that$That = 1
 290 d.nothat = d
 291 d.nothat$That = 0
 292 dd = rbind(d.that, d.nothat)
 293 rm(d.that, d.nothat)
 294 
 295 dd$Weight = ifelse(dd$That == 1, dd$CountThat, dd$CountNoThat)
 296 table(dd$That, is.na(dd$Weight))
 297 dd = subset(dd, !is.na(CountThat) & !is.na(CountNoThat))
 298 summary(dd$That == 1)
 299 dd$CountThat = NULL
 300 dd$CountNoThat = NULL
 301 dd$HeadNounCount = table(dd$HeadNoun)[as.character(dd$HeadNoun)]
 302 dd$RcNounCount = table(dd$RcNoun)[as.character(dd$RcNoun)]
 303 dd$RcVerbCount = table(dd$RcVerb)[as.character(dd$RcVerb)]
 304 
 305 library(Design)
 306 dd.dist = datadist(dd)
 307 options(datadist = 'dd.dist')
 308 logistic.ocp1 = lrm(That ~ RcDet, data= dd, weights= Weight, 
 309 	subset= RcDet %in% c('this','that'))
 310 
 311 logistic.ocp1 = lrm(That ~ RcDet, data= dd, weight= Weight)
 312 plot(logistic.ocp1, xlab= "Determiner at RC onset", 
 313 	ylab="log-odds of relativizer")
 314 plot(logistic.ocp1, xlab= "Determiner at RC onset", 
 315 	ylab="probability of relativizer", fun= plogis)
 316 
 317 library(lme4)
 318 dd.sub = subset(dd, HeadNounCount > 10 &
 319 	RcNounCount > 10)
 320 nrow(dd.sub)
 321 lmer.ocp1 = lmer(That ~ RcDet + (1 + RcDet | RcNoun) + (1 | HeadNoun), data= dd.sub, 
 322 	subset= RcDet %in% c('this','that'),
 323 	weights= Weight, family="binomial")
Attachment 'Code-Rochester-IntroToRegressionModels.R'

Attached Files