Attachment 'Code-Rochester-IntroToRegressionModels.R'
Download 1 ###########################################################
2 #
3 # Below, empty lines correspond to slide breaks in the
4 # presentation. I've also marked the major sections breaks.
5 #
6 # For questions, contact Florian Jaeger,
7 # tiflo@bcs.rochester.edu
8 #
9 # Also consider, subscribing to R-lang, an email list for
10 # language researchers working in R
11 # (http://pidgin.ucsd.edu/mailman/listinfo/r-lang)
12 #
13 # you can get help on any command by typing ?command into
14 # the R GUI.
15 #
16 # Lines of the script are executed with CTRL-R (Windows) or
17 # APPLE-ENTER (Mac OS)
18 #
19 ###########################################################
20
21 ## --------------------------------------------------------
22 # Functions (run first)
23 ## --------------------------------------------------------
24 center <- function(x) { return(x - mean(x, na.rm=T)) }
25
26 ## --------------------------------------------------------
27 # Change to your working directory
28 ## --------------------------------------------------------
29 setwd("C:\\Documents and Settings\\florian\\My Documents\\My LabMeetings, Tutorials, & Teaching\\Regression Workshops\\McGill10\\figs")
30
31 ## --------------------------------------------------------
32 # Lexical decision time data
33 ## --------------------------------------------------------
34 library(languageR)
35 head(lexdec[,c(1,2,5,10,11)])
36
37 ?lexdec
38 summary(lexdec)
39 str(lexdec)
40 names(lexdec)
41
42
43 ## --------------------------------------------------------
44 # Ordinary Linear Regression - An example
45 ## --------------------------------------------------------
46 l = glm(RT ~ 1 + Frequency, data=lexdec,
47 family="gaussian")
48 summary(l)
49 sqrt(summary(l)[["dispersion"]])
50
51 ## --------------------------------------------------------
52 # Ordinary Linear Regression - Geometric intuitions
53 ## --------------------------------------------------------
54 library(languageR)
55 library(Design)
56
57 l.lexdec0 = lm(RT ~ 1, data=lexdec)
58 summary(l.lexdec0)
59 plot(x=lexdec$RT,
60 xlab="Case Index",
61 ylab="Response latency (in log-transformed msecs)",
62 main="Predicting Lexical Decision RTs")
63 abline(coef(l.lexdec0)[1], 0, col="orange", lwd=3)
64
65 l.lexdec1 = lm(RT ~ 1 + Frequency, data=lexdec)
66 plot(x=lexdec$Frequency, y=lexdec$RT,
67 xlab="Word Frequency (log-transformed)",
68 ylab="Response latency (in log-transformed msecs)",
69 main="Predicting Lexical Decision RTs")
70 abline(coef(l.lexdec1), col="orange", lwd=3)
71
72 plot(x=exp(lexdec$Frequency), y=exp(lexdec$RT),
73 xlab="Word Frequency",
74 ylab="Response latency (in msecs)",
75 main="Predicting Lexical Decision RTs")
76 curve(exp(coef(l.lexdec1)[1] +
77 coef(l.lexdec1)[2]*log(x)),
78 col="orange", lwd=3, add=T)
79 # see also the plot.Design function
80 # for which you have to fit the model with ols()
81
82 library(scatterplot3d)
83 l.lexdec2 = lm(RT ~ 1 + Frequency + FamilySize, data=lexdec)
84 summary(l.lexdec2)
85
86 s = scatterplot3d(x=lexdec$Frequency, y=lexdec$FamilySize,
87 z=lexdec$RT,
88 xlab="Word Frequency (log-transformed)",
89 ylab="Number of morph. family members (log-transformed)",
90 zlab="Response latency (in log-transformed msecs)",
91 main="Predicting Lexical Decision RTs")
92 s$plane3d(l.lexdec2, col= "orange", lwd=2, lty.box = "solid")
93
94 l.lexdec3 = lm(RT ~ 1 + Frequency + FamilySize +
95 NativeLanguage, data=lexdec)
96 s = scatterplot3d(x=lexdec$Frequency, y=lexdec$FamilySize,
97 z=lexdec$RT,
98 xlab="Word Frequency (log-transformed)",
99 ylab="Number of morph. family members (log-transformed)",
100 zlab="Response latency (in log-transformed msecs)",
101 main="Predicting Lexical Decision RTs",
102 sub="Native Speakers (red) and\nNon-Native Speakers (orange)")
103 s$plane3d(coef(l.lexdec3)[1],
104 coef(l.lexdec3)[2], coef(l.lexdec3)[3],
105 col= "red", lwd=2,
106 lty.box = "solid")
107 s$plane3d(coef(l.lexdec3)[1] + coef(l.lexdec3)[4],
108 coef(l.lexdec3)[2], coef(l.lexdec3)[3],
109 col= "orange", lwd=2,
110 lty.box = "solid")
111
112 l.lexdec4 = lm(RT ~ 1 + Frequency + FamilySize +
113 NativeLanguage + NativeLanguage:Frequency,
114 data=lexdec)
115 summary(l.lexdec4)
116 s = scatterplot3d(x=lexdec$Frequency, y=lexdec$FamilySize,
117 z=lexdec$RT,
118 xlab="Word Frequency (log-transformed)",
119 ylab="Number of morph. family members (log-transformed)",
120 zlab="Response latency (in log-transformed msecs)",
121 main="Predicting Lexical Decision RTs",
122 sub="Interaction with Native Speakers (red) and\nNon-Native Speakers (orange)")
123 s$plane3d(coef(l.lexdec4)[1],
124 coef(l.lexdec4)[2], coef(l.lexdec4)[3],
125 col= "red", lwd=2,
126 lty.box = "solid")
127 s$plane3d(coef(l.lexdec4)[1] + coef(l.lexdec4)[4],
128 coef(l.lexdec4)[2] + coef(l.lexdec4)[5],
129 coef(l.lexdec4)[3],
130 col= "orange", lwd=2,
131 lty.box = "solid")
132
133 ## --------------------------------------------------------
134 # Random Effects
135 ## --------------------------------------------------------
136 setwd("C:\\Documents and Settings\\florian\\My Documents\\My LabMeetings, Tutorials, & Teaching\\Regression Workshops\\PennState\\figs")
137 library(languageR)
138
139 o = exp(as.vector(tapply(lexdec$RT, lexdec$Subject, mean)))
140 plot(o, type = "n", xlab = "Subject Index",
141 ylab = "Response latency in msecs",
142 main = "Subject as random effect")
143 text(o, as.character(unique(lexdec$Subject)), col = "blue")
144 abline(mean(exp(lexdec$RT)),0, col = "gray", lwd=2)
145
146 lmer.lexdec0 = lmer(RT ~ 1 + Frequency +
147 (1 | Subject), data=lexdec)
148 cor(fitted(lmer.lexdec0), lexdec$RT)^2
149 pvals.fnc(lmer.lexdec0, nsim = 10000)
150 ranef(lmer.lexdec0)
151
152 lmer.lexdec0 = lmer(RT ~ 1 + (1 | Subject), data=lexdec)
153
154 p = exp(as.vector(unlist(coef(lmer.lexdec0)$Subject)))
155 text(p, as.character(unique(lexdec$Subject)), col = "red")
156 legend(x=2, y=850, legend=c("Observed","Predicted"),
157 col=c("blue", "red"), pch=1)
158
159 lmer.lexdec1 = lmer(RT ~ 1 + (1 | Subject) + (1 | Word),
160 data=lexdec)
161 o = exp(as.vector(tapply(lexdec$RT, lexdec$Word, mean, na.rm=T)))
162 p = exp(as.vector(unlist(coef(lmer.lexdec1)$Word)))
163 plot(o, type = "n", xlab = "Word Index",
164 ylab = "Response latency in msecs",
165 main = "Word as random effect")
166 abline(mean(exp(lexdec$RT)),0, col = "gray", lwd=2)
167 text(o, as.character(unique(lexdec$Word)), col = "blue")
168 segments(x0=1:length(o), y0=o, x1=1:length(p),
169 y1=p, col="red", lwd=2)
170 points(p, pch=ifelse(p > o, "^", "v"), col="red", lwd=2)
171
172 lmer.lexdec2 = lmer(RT ~ 1 + Frequency +
173 (1 | Subject) + (0 + Frequency | Subject) +
174 (1 | Word),
175 data=lexdec)
176
177 ranef(lmer.lexdec2)
178
179 lmer.lexdec3 = lmer(RT ~ 1 + Frequency +
180 (1 + Frequency | Subject) + (1 | Word),
181 data=lexdec)
182
183 plot(ranef(lmer.lexdec3)$Subject,
184 main="Random Effect Correlation")
185
186
187 # for all models
188 dotplot(ranef(lmer.lexdec3))
189 qqmath(ranef(lmer.lexdec3))
190
191 # currently only for models with relatively
192 # simple random effects structures
193 dotplot(ranef(lmer.lexdec3, postVar=TRUE))
194 dotplot(ranef(lmer.lexdec3, postVar=TRUE),
195 scales = list(x =
196 list(relation = 'free')))[["Subject"]]
197
198 qqmath(ranef(lmer.lexdec3, postVar=TRUE))
199
200 # To see what the postVar=TRUE option does, see ?ranef, or look at the
201 # code returned by getMethod(ranef, "mer"). There are also some examples
202 # on the ranef help page.
203
204 anova(lmer.lexdec3, lmer.lexdec2)
205 anova(lmer.lexdec3, lmer.lexdec3.simple =
206 lmer(RT ~ 1 + Frequency +
207 (1 | Subject) + (1 | Word),
208 data=lexdec)
209 )
210
211 # center variables
212 lexdec$cNativeEnglish <- center(ifelse(
213 lexdec$NativeLanguage == "English", 1,0))
214 lexdec$cFrequency <- center(lexdec$Frequency)
215 lexdec$cFamilySize <- center(lexdec$FamilySize)
216 lexdec$cSynsetCount <- center(lexdec$SynsetCount)
217 lexdec$cPlant <- center(ifelse(
218 lexdec$Class == "plant", 1,0))
219
220 lmer.lexdec4 = lmer(RT ~ 1 + cNativeEnglish * (
221 cFrequency + cFamilySize + cSynsetCount +
222 cPlant) +
223 (1 + Frequency | Subject) + (1 | Word),
224 data=lexdec)
225 print(lmer.lexdec4, corr=F)
226
227 # for printing with plotLMER.fnc, let's use the
228 # uncentered variables.
229 lmer.lexdec4b = lmer(RT ~ 1 + NativeLanguage * (
230 Frequency + FamilySize + SynsetCount +
231 Class) +
232 (1 | Subject) + (1 | Word),
233 data=lexdec)
234 p.lmer.lexdec4b = pvals.fnc(lmer.lexdec4b,
235 nsim=10000, withMCMC=T)
236 p.lmer.lexdec
237 plotLMER.fnc(lmer.lexdec4b, pred="Frequency",
238 mcmcMat=p.lmer.lexdec4b$mcmc,
239 intr=list("NativeLanguage", c("English", "Other"),
240 "end", list(c("red", "orange"),
241 c(1,1))), cex=1.3, lwd=2)
242
243 ## --------------------------------------------------------
244 # Ordinary Logistic Regression - An example
245 ## --------------------------------------------------------
246 library(languageR)
247 lmer.lexdec.answer4 = lmer(Correct == "correct" ~
248 1 + cNativeEnglish * (
249 cFrequency + cFamilySize + cSynsetCount +
250 cPlant) +
251 (1 + Frequency | Subject) + (1 | Word),
252 data=lexdec, family="binomial")
253
254 lmer.lexdec.answer4b = lmer(Correct == "correct" ~
255 1 + NativeLanguage * (
256 Frequency + FamilySize + SynsetCount +
257 Class) +
258 (1 + Frequency | Subject) + (1 | Word),
259 data=lexdec, family="binomial")
260 plotLMER.fnc(lmer.lexdec.answer4b, pred="Frequency",
261 intr=list("NativeLanguage", c("English", "Other"),
262 "end", list(c("red", "orange"),
263 c(1,1))), cex=1.3, lwd=2, fun=I)
264 plotLMER.fnc(lmer.lexdec.answer4b, pred="Frequency",
265 intr=list("NativeLanguage", c("English", "Other"),
266 "end", list(c("red", "orange"),
267 c(1,1))), cex=1.3, lwd=2)
268
269
270
271
272
273 setwd("C:\\Documents and Settings\\florian\\My Documents\\_Papers\\GillespieJaeger_OCP\\data\\data")
274
275 d= as.data.frame(read.csv(file="results.txt", sep="\t",
276 header=F))
277 names(d) = c('Sequence', 'CountThat', 'CountNoThat')
278
279 d$ProportionThat = d$CountThat / (d$CountThat + d$CountNoThat)
280 d$HeadNoun = as.factor(sub("^([a-zA-Z]+)\\s.+$", "\\1", d$Sequence))
281 d$RcDet = as.factor(sub("^[a-zA-Z]+\\sthat\\s([a-zA-Z]+)\\s.+$", "\\1", d$Sequence))
282 d$RcNoun = as.factor(sub("^.*\\s([a-zA-Z]+)\\s[a-zA-Z]+$", "\\1", d$Sequence))
283 d$RcVerb = as.factor(sub("^.*\\s([a-zA-Z]+)$", "\\1", d$Sequence))
284
285 poisson.ocp1 = glm(CountThat ~ RcDet, data= d, family= "poisson")
286 summary(poisson.ocp1)
287
288 d.that = d
289 d.that$That = 1
290 d.nothat = d
291 d.nothat$That = 0
292 dd = rbind(d.that, d.nothat)
293 rm(d.that, d.nothat)
294
295 dd$Weight = ifelse(dd$That == 1, dd$CountThat, dd$CountNoThat)
296 table(dd$That, is.na(dd$Weight))
297 dd = subset(dd, !is.na(CountThat) & !is.na(CountNoThat))
298 summary(dd$That == 1)
299 dd$CountThat = NULL
300 dd$CountNoThat = NULL
301 dd$HeadNounCount = table(dd$HeadNoun)[as.character(dd$HeadNoun)]
302 dd$RcNounCount = table(dd$RcNoun)[as.character(dd$RcNoun)]
303 dd$RcVerbCount = table(dd$RcVerb)[as.character(dd$RcVerb)]
304
305 library(Design)
306 dd.dist = datadist(dd)
307 options(datadist = 'dd.dist')
308 logistic.ocp1 = lrm(That ~ RcDet, data= dd, weights= Weight,
309 subset= RcDet %in% c('this','that'))
310
311 logistic.ocp1 = lrm(That ~ RcDet, data= dd, weight= Weight)
312 plot(logistic.ocp1, xlab= "Determiner at RC onset",
313 ylab="log-odds of relativizer")
314 plot(logistic.ocp1, xlab= "Determiner at RC onset",
315 ylab="probability of relativizer", fun= plogis)
316
317 library(lme4)
318 dd.sub = subset(dd, HeadNounCount > 10 &
319 RcNounCount > 10)
320 nrow(dd.sub)
321 lmer.ocp1 = lmer(That ~ RcDet + (1 + RcDet | RcNoun) + (1 | HeadNoun), data= dd.sub,
322 subset= RcDet %in% c('this','that'),
323 weights= Weight, family="binomial")
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.