Attachment 'Day1.R'
Download
Toggle line numbers
1 ###########################################################
2 #
3 # Below, empty lines correspond to slide breaks in the
4 # presentation. I've also marked the major sections breaks.
5 #
6 # For questions, contact Florian Jaeger,
7 # tiflo@bcs.rochester.edu
8 #
9 # Also consider, subscribing to R-lang, an email list for
10 # language researchers working in R
11 # (http://pidgin.ucsd.edu/mailman/listinfo/r-lang)
12 #
13 # you can get help on any command by typing ?command into
14 # the R GUI.
15 #
16 # Lines of the script are executed with CTRL-R (Windows) or
17 # APPLE-ENTER (Mac OS)
18 #
19 ###########################################################
20
21 ## --------------------------------------------------------
22 # Functions (run first)
23 ## --------------------------------------------------------
24 center <- function(x) { return(x - mean(x, na.rm=T)) }
25
26 ## --------------------------------------------------------
27 # Change to your working directory
28 ## --------------------------------------------------------
29 setwd("C:\\Documents and Settings\\florian\\My Documents\\My LabMeetings, Tutorials, & Teaching\\Regression Workshops\\PennState\\figs")
30
31 ## --------------------------------------------------------
32 # Lexical decision time data
33 ## --------------------------------------------------------
34 library(languageR)
35 head(lexdec[,c(1,2,5,10,11)])
36
37 ?lexdec
38 summary(lexdec)
39 str(lexdec)
40 names(lexdec)
41
42
43 ## --------------------------------------------------------
44 # Ordinary Linear Regression - An example
45 ## --------------------------------------------------------
46 l = glm(RT ~ 1 + Frequency, data=lexdec,
47 family="gaussian")
48 summary(l)
49 sqrt(summary(l)[["dispersion"]])
50
51 ## --------------------------------------------------------
52 # Ordinary Linear Regression - Geometric intuitions
53 ## --------------------------------------------------------
54 library(languageR)
55 library(Design)
56
57 l.lexdec0 = lm(RT ~ 1, data=lexdec)
58 summary(l.lexdec0)
59 plot(x=lexdec$RT,
60 xlab="Case Index",
61 ylab="Response latency (in log-transformed msecs)",
62 main="Predicting Lexical Decision RTs")
63 abline(coef(l.lexdec0)[1], 0, col="orange", lwd=3)
64
65 l.lexdec1 = lm(RT ~ 1 + Frequency, data=lexdec)
66 plot(x=lexdec$Frequency, y=lexdec$RT,
67 xlab="Word Frequency (log-transformed)",
68 ylab="Response latency (in log-transformed msecs)",
69 main="Predicting Lexical Decision RTs")
70 abline(coef(l.lexdec1), col="orange", lwd=3)
71
72 plot(x=exp(lexdec$Frequency), y=exp(lexdec$RT),
73 xlab="Word Frequency",
74 ylab="Response latency (in msecs)",
75 main="Predicting Lexical Decision RTs")
76 curve(exp(coef(l.lexdec1)[1] +
77 coef(l.lexdec1)[2]*log(x)),
78 col="orange", lwd=3, add=T)
79 # see also the plot.Design function
80 # for which you have to fit the model with ols()
81
82 library(scatterplot3d)
83 l.lexdec2 = lm(RT ~ 1 + Frequency + FamilySize, data=lexdec)
84 summary(l.lexdec2)
85
86 s = scatterplot3d(x=lexdec$Frequency, y=lexdec$FamilySize,
87 z=lexdec$RT,
88 xlab="Word Frequency (log-transformed)",
89 ylab="Number of morph. family members (log-transformed)",
90 zlab="Response latency (in log-transformed msecs)",
91 main="Predicting Lexical Decision RTs")
92 s$plane3d(l.lexdec2, col= "orange", lwd=2, lty.box = "solid")
93
94 l.lexdec3 = lm(RT ~ 1 + Frequency + FamilySize +
95 NativeLanguage, data=lexdec)
96 s = scatterplot3d(x=lexdec$Frequency, y=lexdec$FamilySize,
97 z=lexdec$RT,
98 xlab="Word Frequency (log-transformed)",
99 ylab="Number of morph. family members (log-transformed)",
100 zlab="Response latency (in log-transformed msecs)",
101 main="Predicting Lexical Decision RTs",
102 sub="Native Speakers (red) and\nNon-Native Speakers (blue)")
103 s$plane3d(coef(l.lexdec3)[1],
104 coef(l.lexdec3)[2], coef(l.lexdec3)[3],
105 col= "red", lwd=2,
106 lty.box = "solid")
107 s$plane3d(coef(l.lexdec3)[1] + coef(l.lexdec3)[4],
108 coef(l.lexdec3)[2], coef(l.lexdec3)[3],
109 col= "orange", lwd=2,
110 lty.box = "solid")
111
112 l.lexdec4 = lm(RT ~ 1 + Frequency + FamilySize +
113 NativeLanguage + NativeLanguage:Frequency,
114 data=lexdec)
115 summary(l.lexdec4)
116 s = scatterplot3d(x=lexdec$Frequency, y=lexdec$FamilySize,
117 z=lexdec$RT,
118 xlab="Word Frequency (log-transformed)",
119 ylab="Number of morph. family members (log-transformed)",
120 zlab="Response latency (in log-transformed msecs)",
121 main="Predicting Lexical Decision RTs",
122 sub="Interaction with Native Speakers (red) and\nNon-Native Speakers (blue)")
123 s$plane3d(coef(l.lexdec4)[1],
124 coef(l.lexdec4)[2], coef(l.lexdec4)[3],
125 col= "red", lwd=2,
126 lty.box = "solid")
127 s$plane3d(coef(l.lexdec4)[1] + coef(l.lexdec4)[4],
128 coef(l.lexdec4)[2] + coef(l.lexdec4)[5],
129 coef(l.lexdec4)[3],
130 col= "orange", lwd=2,
131 lty.box = "solid")
132
133 ## --------------------------------------------------------
134 # Random Effects
135 ## --------------------------------------------------------
136 setwd("C:\\Documents and Settings\\florian\\My Documents\\My LabMeetings, Tutorials, & Teaching\\Regression Workshops\\PennState\\figs")
137 library(languageR)
138
139 o = exp(as.vector(tapply(lexdec$RT, lexdec$Subject, mean)))
140 plot(o, type = "n", xlab = "Subject Index",
141 ylab = "Response latency in msecs",
142 main = "Subject as random effect")
143 text(o, as.character(unique(lexdec$Subject)), col = "blue")
144 abline(mean(exp(lexdec$RT)),0, col = "gray", lwd=2)
145
146 lmer.lexdec0 = lmer(RT ~ 1 + Frequency +
147 (1 | Subject), data=lexdec)
148
149 cor(fitted(lmer.lexdec0), lexdec$RT)^2
150
151 pvals.fnc(lmer.lexdec0, nsim = 10000)
152
153 ranef(lmer.lexdec0)
154
155 p = exp(as.vector(unlist(coef(lme.lexdec0)$Subject)))
156 text(p, as.character(unique(lexdec$Subject)), col = "red")
157 legend(x=2, y=850, legend=c("Predicted", "Observed"),
158 col=c("blue", "red"), pch=1)
159
160 lmer.lexdec1 = lmer(RT ~ 1 + (1 | Subject) + (1 | Word),
161 data=lexdec)
162 o = exp(as.vector(tapply(lexdec$RT, lexdec$Word, mean, na.rm=T)))
163 p = exp(as.vector(unlist(coef(lmer.lexdec1)$Word)))
164 plot(o, type = "n", xlab = "Word Index",
165 ylab = "Response latency in msecs",
166 main = "Word as random effect")
167 abline(mean(exp(lexdec$RT)),0, col = "gray", lwd=2)
168 text(o, as.character(unique(lexdec$Word)), col = "blue")
169 segments(x0=1:length(o), y0=o, x1=1:length(p),
170 y1=p, col="red", lwd=2)
171 points(p, pch=ifelse(p > o, "^", "v"), col="red", lwd=2)
172
173 lmer.lexdec2 = lmer(RT ~ 1 + Frequency +
174 (1 | Subject) + (0 + Frequency | Subject) +
175 (1 | Word),
176 data=lexdec)
177
178 ranef(lmer.lexdec2)
179
180 lmer.lexdec3 = lmer(RT ~ 1 + Frequency +
181 (1 + Frequency | Subject) + (1 | Word),
182 data=lexdec)
183
184 plot(ranef(lmer.lexdec3)$Subject,
185 main="Random Effect Correlation")
186
187
188 # for all models
189 dotplot(ranef(lmer.lexdec3))
190 qqmath(ranef(lmer.lexdec3))
191
192 # currently only for models with relatively
193 # simple random effects structures
194 dotplot(ranef(lmer.lexdec3, postVar=TRUE))
195 dotplot(ranef(lmer.lexdec3, postVar=TRUE),
196 scales = list(x =
197 list(relation = 'free')))[["Subject"]]
198
199 qqmath(ranef(lmer.lexdec3, postVar=TRUE))
200
201 # To see what the postVar=TRUE option does, see ?ranef, or look at the
202 # code returned by getMethod(ranef, "mer"). There are also some examples
203 # on the ranef help page.
204
205 anova(lmer.lexdec3, lmer.lexdec2)
206 anova(lmer.lexdec3, lmer.lexdec3.simple =
207 lmer(RT ~ 1 + Frequency +
208 (1 | Subject) + (1 | Word),
209 data=lexdec)
210 )
211
212 # center variables
213 lexdec$cNativeEnglish <- center(ifelse(
214 lexdec$NativeLanguage == "English", 1,0))
215 lexdec$cFrequency <- center(lexdec$Frequency)
216 lexdec$cFamilySize <- center(lexdec$FamilySize)
217 lexdec$cSynsetCount <- center(lexdec$SynsetCount)
218 lexdec$cPlant <- center(ifelse(
219 lexdec$Class == "plant", 1,0))
220
221 lmer.lexdec4 = lmer(RT ~ 1 + cNativeEnglish * (
222 cFrequency + cFamilySize + cSynsetCount +
223 cPlant) +
224 (1 + Frequency | Subject) + (1 | Word),
225 data=lexdec)
226 print(lmer.lexdec4, corr=F)
227
228 # for printing with plotLMER.fnc, let's use the
229 # uncentered variables.
230 lmer.lexdec4b = lmer(RT ~ 1 + NativeLanguage * (
231 Frequency + FamilySize + SynsetCount +
232 Class) +
233 (1 | Subject) + (1 | Word),
234 data=lexdec)
235 p.lmer.lexdec4b = pvals.fnc(lmer.lexdec4b,
236 nsim=10000, withMCMC=T)
237 p.lmer.lexdec
238 plotLMER.fnc(lmer.lexdec4b, pred="Frequency",
239 mcmcMat=p.lmer.lexdec4b$mcmc,
240 intr=list("NativeLanguage", c("English", "Other"),
241 "end", list(c("red", "orange"),
242 c(1,1))), cex=1.3, lwd=2)
243
244 ## --------------------------------------------------------
245 # Ordinary Logistic Regression - An example
246 ## --------------------------------------------------------
247 library(languageR)
248 lmer.lexdec.answer4 = lmer(Correct == "correct" ~
249 1 + cNativeEnglish * (
250 cFrequency + cFamilySize + cSynsetCount +
251 cPlant) +
252 (1 + Frequency | Subject) + (1 | Word),
253 data=lexdec, family="binomial")
254
255 lmer.lexdec.answer4b = lmer(Correct == "correct" ~
256 1 + NativeLanguage * (
257 Frequency + FamilySize + SynsetCount +
258 Class) +
259 (1 + Frequency | Subject) + (1 | Word),
260 data=lexdec, family="binomial")
261 plotLMER.fnc(lmer.lexdec.answer4b, pred="Frequency",
262 intr=list("NativeLanguage", c("English", "Other"),
263 "end", list(c("red", "orange"),
264 c(1,1))), cex=1.3, lwd=2, fun=I)
265 plotLMER.fnc(lmer.lexdec.answer4b, pred="Frequency",
266 intr=list("NativeLanguage", c("English", "Other"),
267 "end", list(c("red", "orange"),
268 c(1,1))), cex=1.3, lwd=2)
269
270
271
272
273
274 setwd("C:\\Documents and Settings\\florian\\My Documents\\_Papers\\GillespieJaeger_OCP\\data\\data")
275
276 d= as.data.frame(read.csv(file="results.txt", sep="\t",
277 header=F))
278 names(d) = c('Sequence', 'CountThat', 'CountNoThat')
279
280 d$ProportionThat = d$CountThat / (d$CountThat + d$CountNoThat)
281 d$HeadNoun = as.factor(sub("^([a-zA-Z]+)\\s.+$", "\\1", d$Sequence))
282 d$RcDet = as.factor(sub("^[a-zA-Z]+\\sthat\\s([a-zA-Z]+)\\s.+$", "\\1", d$Sequence))
283 d$RcNoun = as.factor(sub("^.*\\s([a-zA-Z]+)\\s[a-zA-Z]+$", "\\1", d$Sequence))
284 d$RcVerb = as.factor(sub("^.*\\s([a-zA-Z]+)$", "\\1", d$Sequence))
285
286 poisson.ocp1 = glm(CountThat ~ RcDet, data= d, family= "poisson")
287 summary(poisson.ocp1)
288
289 d.that = d
290 d.that$That = 1
291 d.nothat = d
292 d.nothat$That = 0
293 dd = rbind(d.that, d.nothat)
294 rm(d.that, d.nothat)
295
296 dd$Weight = ifelse(dd$That == 1, dd$CountThat, dd$CountNoThat)
297 table(dd$That, is.na(dd$Weight))
298 dd = subset(dd, !is.na(CountThat) & !is.na(CountNoThat))
299 summary(dd$That == 1)
300 dd$CountThat = NULL
301 dd$CountNoThat = NULL
302 dd$HeadNounCount = table(dd$HeadNoun)[as.character(dd$HeadNoun)]
303 dd$RcNounCount = table(dd$RcNoun)[as.character(dd$RcNoun)]
304 dd$RcVerbCount = table(dd$RcVerb)[as.character(dd$RcVerb)]
305
306 library(Design)
307 dd.dist = datadist(dd)
308 options(datadist = 'dd.dist')
309 logistic.ocp1 = lrm(That ~ RcDet, data= dd, weights= Weight,
310 subset= RcDet %in% c('this','that'))
311
312 logistic.ocp1 = lrm(That ~ RcDet, data= dd, weight= Weight)
313 plot(logistic.ocp1, xlab= "Determiner at RC onset",
314 ylab="log-odds of relativizer")
315 plot(logistic.ocp1, xlab= "Determiner at RC onset",
316 ylab="probability of relativizer", fun= plogis)
317
318 library(lme4)
319 dd.sub = subset(dd, HeadNounCount > 10 &
320 RcNounCount > 10)
321 nrow(dd.sub)
322 lmer.ocp1 = lmer(That ~ RcDet + (1 + RcDet | RcNoun) + (1 | HeadNoun), data= dd.sub,
323 subset= RcDet %in% c('this','that'),
324 weights= Weight, family="binomial")
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.