Attachment 'LSA13-AuxLecture5-TimeSeriesData-eye-tracking-analysis.R'
Download 1 ## -----------------
2 # functions
3 ## -----------------
4 myCenter = function(x) x - mean(x, na.rm=T)
5 emplog = function(p, totalN) {
6 y = p*totalN
7 return(log( (y+.5)/ (totalN-y + .5) ))
8 }
9 emplogweight = function(p, totalN) {
10 y = p*totalN
11 return(1 / ( 1 / (y+.5) + 1 / (totalN-y + .5) ))
12 }
13
14
15 ## -----------------
16 # load data
17 ## -----------------
18 setwd("C:\\Users\\tiflo\\Documents\\My LabMeetings, Tutorials, & Teaching\\LSA13\\latex\\scripts")
19 d = read.csv(file = "eye-tracking-sample.csv")
20 str(d)
21
22 ## -----------------
23 # analysis 1: ANOVA (collapsing over time)
24 ## -----------------
25 d.agg = aggregate(d[,c('LooksToTarget')], by= list(
26 Subj = d$Subj,
27 CondWordFrequency = d$CondWordFrequency,
28 CondCompetitors = d$CondCompetitors
29 ),
30 FUN = mean
31 )
32 names(d.agg)[length(names(d.agg))] = "LooksToTarget"
33 str(d.agg)
34
35 # visualize
36 with(d.agg, interaction.plot(CondCompetitors, CondWordFrequency, LooksToTarget,
37 type="b",
38 col=c(1:2),
39 legend = F,
40 # leg.bty="o", leg.bg="beige",
41 lwd=2,
42 pch=c(18,24),
43 ylim= c(0,1),
44 xlab="Number of competitors",
45 ylab="Proportion looks to target",
46 main="Interaction Plot")
47 )
48 legend(1.5, 1, title= "Word Frequency",
49 legend = c("high", "low"),
50 col=c(1:2),
51 bty="o",
52 bg="beige",
53 lwd=2,
54 pch=c(18,24)
55 )
56
57 # see: http://www.statmethods.net/stats/anova.html
58 m.F1 = aov(LooksToTarget ~ 1 + CondWordFrequency * CondCompetitors +
59 Error(Subj/(CondWordFrequency * CondCompetitors)),
60 data = d.agg)
61 summary(m.F1)
62
63 d.agg = aggregate(d[,c('LooksToTarget')], by= list(
64 Item = d$Item,
65 CondWordFrequency = d$CondWordFrequency,
66 CondCompetitors = d$CondCompetitors
67 ),
68 FUN = mean
69 )
70 names(d.agg)[length(names(d.agg))] = "LooksToTarget"
71 str(d.agg)
72
73 m.F2 = aov(LooksToTarget ~ 1 + CondWordFrequency * CondCompetitors +
74 Error(Item/(CondWordFrequency * CondCompetitors)),
75 data = d.agg)
76 summary(m.F2)
77
78 ## -----------------
79 # analysis 2: ANOVA (collapsing over time) - arcsine
80 ## -----------------
81 d.agg = aggregate(d[,c('LooksToTarget')], by= list(
82 Subj = d$Subj,
83 CondWordFrequency = d$CondWordFrequency,
84 CondCompetitors = d$CondCompetitors
85 ),
86 FUN = function(x) { asin(sqrt(mean(x))) }
87 )
88 names(d.agg)[length(names(d.agg))] = "LooksToTarget"
89 str(d.agg)
90
91 m.F1 = aov(LooksToTarget ~ 1 + CondWordFrequency * CondCompetitors +
92 Error(Subj/(CondWordFrequency * CondCompetitors)),
93 data = d.agg)
94 summary(m.F1)
95
96 d.agg = aggregate(d[,c('LooksToTarget')], by= list(
97 Item = d$Item,
98 CondWordFrequency = d$CondWordFrequency,
99 CondCompetitors = d$CondCompetitors
100 ),
101 FUN = function(x) { asin(sqrt(mean(x))) }
102 )
103 names(d.agg)[length(names(d.agg))] = "LooksToTarget"
104 str(d.agg)
105
106 m.F2 = aov(LooksToTarget ~ 1 + CondWordFrequency * CondCompetitors +
107 Error(Item/(CondWordFrequency * CondCompetitors)),
108 data = d.agg)
109 summary(m.F2)
110
111
112 ## -----------------
113 # analysis 3: mixed linear model
114 #
115 # note: aggregation over subject and items combined
116 ## -----------------
117 d.agg = aggregate(d[,c('LooksToTarget')], by= list(
118 Subj = d$Subj,
119 Item = d$Item,
120 CondWordFrequency = d$CondWordFrequency,
121 CondCompetitors = d$CondCompetitors
122 ),
123 FUN = mean
124 )
125 names(d.agg)[length(names(d.agg))] = "LooksToTarget"
126
127 d.agg$cCondWordFrequencyHigh = myCenter(ifelse(d.agg$CondWordFrequency == "high", 1, 0))
128 d.agg$cCondCompetitorsTwo = myCenter(ifelse(d.agg$CondCompetitors == "two", 1, 0))
129
130 # model with full random effect structure
131 library(lme4)
132 m.full = lmer(LooksToTarget ~
133 cCondWordFrequencyHigh * cCondCompetitorsTwo +
134 (1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Subj) +
135 (1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Item),
136 d.agg
137 )
138 m.full
139
140
141 ## -----------------
142 # analysis 5: empirical logit
143 #
144 # note: aggregation over subject and items combined
145 ## -----------------
146 d.agg = aggregate(d[,c('LooksToTarget')], by= list(
147 Subj = d$Subj,
148 Item = d$Item,
149 CondWordFrequency = d$CondWordFrequency,
150 CondCompetitors = d$CondCompetitors
151 ),
152 FUN = mean
153 )
154 names(d.agg)[length(names(d.agg))] = "LooksToTarget"
155
156 TotalLooks = aggregate(d[,c('LooksToTarget')], by= list(
157 Subj = d$Subj,
158 Item = d$Item,
159 CondWordFrequency = d$CondWordFrequency,
160 CondCompetitors = d$CondCompetitors
161 ),
162 FUN = length
163 )
164
165 d.agg$cCondWordFrequencyHigh =
166 myCenter(ifelse(d.agg$CondWordFrequency == "high", 1, 0))
167 d.agg$cCondCompetitorsTwo =
168 myCenter(ifelse(d.agg$CondCompetitors == "two", 1, 0))
169
170 # model with full random effect structure
171 m.full = lmer(emplog(LooksToTarget, TotalLooks$x) ~
172 cCondWordFrequencyHigh * cCondCompetitorsTwo +
173 (1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Subj) +
174 (1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Item),
175 d.agg,
176 weight = emplogweight(LooksToTarget, TotalLooks$x)
177 )
178 m.full
179 # deviance 1191ish
180
181 # model with only random intercepts
182 m.simple = lmer(emplog(LooksToTarget, TotalLooks$x) ~
183 cCondWordFrequencyHigh * cCondCompetitorsTwo +
184 (1 | Subj) +
185 (1 | Item),
186 d.agg,
187 weight = emplogweight(LooksToTarget, TotalLooks$x)
188 )
189 m.simple
190 anova(m.full,m.simple)
191 # deviance 1209, so there is room for improvement beyond the intercepts
192
193 # model with full random effect structure for item
194 m.full.item.1 = lmer(emplog(LooksToTarget, TotalLooks$x) ~
195 cCondWordFrequencyHigh * cCondCompetitorsTwo +
196 (1 + cCondWordFrequencyHigh + cCondCompetitorsTwo | Subj) +
197 (1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Item),
198 d.agg,
199 weight = emplogweight(LooksToTarget, TotalLooks$x)
200 )
201
202 m.full.item.2 = lmer(emplog(LooksToTarget, TotalLooks$x) ~
203 cCondWordFrequencyHigh * cCondCompetitorsTwo +
204 (1 + cCondWordFrequencyHigh | Subj) +
205 (1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Item),
206 d.agg,
207 weight = emplogweight(LooksToTarget, TotalLooks$x)
208 )
209
210 m.full.item.3 = lmer(emplog(LooksToTarget, TotalLooks$x) ~
211 cCondWordFrequencyHigh * cCondCompetitorsTwo +
212 (1 + cCondCompetitorsTwo | Subj) +
213 (1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Item),
214 d.agg,
215 weight = emplogweight(LooksToTarget, TotalLooks$x)
216 )
217
218 # comparison of the above models in terms of their deviance suggests
219 # that the only justified random slope for subjects is one for
220 # competitors. The resulting model is just as good as the full model.
221
222 m.1.3 = lmer(emplog(LooksToTarget, TotalLooks$x) ~
223 cCondWordFrequencyHigh * cCondCompetitorsTwo +
224 (1 + cCondCompetitorsTwo | Subj) +
225 (1 + cCondWordFrequencyHigh + cCondCompetitorsTwo | Item),
226 d.agg,
227 weight = emplogweight(LooksToTarget, TotalLooks$x)
228 )
229 anova(m.full.item.3, m.1.3)
230
231
232 m.2.3 = lmer(emplog(LooksToTarget, TotalLooks$x) ~
233 cCondWordFrequencyHigh * cCondCompetitorsTwo +
234 (1 + cCondCompetitorsTwo | Subj) +
235 (1 + cCondCompetitorsTwo | Item),
236 d.agg,
237 weight = emplogweight(LooksToTarget, TotalLooks$x)
238 )
239 anova(m.full.item.3, m.2.3, m.1.3)
240
241 m.3.3 = lmer(emplog(LooksToTarget, TotalLooks$x) ~
242 cCondWordFrequencyHigh * cCondCompetitorsTwo +
243 (1 + cCondCompetitorsTwo | Subj) +
244 (1 + cCondWordFrequencyHigh | Item),
245 d.agg,
246 weight = emplogweight(LooksToTarget, TotalLooks$x)
247 )
248 anova(m.full.item.3, m.3.3, m.1.3)
249
250 # m.1.3 seems the best model
251
252
253 ## -----------------
254 # analysis 4: logit
255 #
256 # note: no aggregation. This analysis would yield inflated confidence.
257 # It is hence anti-consersvative
258 ## -----------------
259
260
261 d.agg$cCondWordFrequencyHigh =
262 myCenter(ifelse(d.agg$CondWordFrequency == "high", 1, 0))
263 d.agg$cCondCompetitorsTwo =
264 myCenter(ifelse(d.agg$CondCompetitors == "two", 1, 0))
265
266 # full model (do not run, because it takes a long time to fit
267 # since the data were artificial created in a way that means
268 # that the full model is unjustified
269 # m.full = lmer(LooksToTarget ~ CondWordFrequency * CondCompetitors +
270 # (1 + CondWordFrequency * CondCompetitors | Subj) +
271 # (1 + CondWordFrequency * CondCompetitors | Item),
272 # data = d,
273 # family = "binomial"
274 # )
275 # m.full
276 #
277 # Deviance: 39968
278 # save(m.full, file = "eye-tracking-full-model.Rdata", compress=T)
279
280
281 # reducing random effects
282 m.simple = glmer(LooksToTarget ~ cCondWordFrequencyHigh * cCondCompetitorsTwo +
283 (1 | Subj) +
284 (1 | Item),
285 data = d,
286 family = "binomial"
287 )
288 m.simple
289
290
291
292
293 ## -----------------
294 # Now let's take time into consideration:
295 #
296 # There are two common ways to analyze this type of
297 # binomial time series data:
298 #
299 # 1) weighted linear regression over empirical logits
300 # 2) binomial logit regression
301 #
302 # Binned data (for Barr type emp. logit analysis)
303 ## -----------------
304
305 d.agg = aggregate(d[,c('LooksToTarget')], by= list(
306 Subj = d$Subj,
307 Item = d$Item,
308 CondWordFrequency = d$CondWordFrequency,
309 CondCompetitors = d$CondCompetitors,
310 Bin = d$Bin
311 ),
312 FUN = mean,
313 nfrequency = T
314 )
315 names(d.agg)[length(names(d.agg))] = "LooksToTarget"
316 d.agg$cBin = myCenter(d.agg$Bin)
317
318 # plot
319 library(ggplot2)
320 library(rms)
321 ggplot(d.agg, aes(x=Bin,
322 y= LooksToTarget,
323 col= as.factor(CondWordFrequency),
324 linetype=as.factor(CondCompetitors)
325 )
326 ) +
327 geom_smooth(size=1) +
328 geom_smooth(method=lm,
329 formula= y ~ pol(x, 2),
330 family="binomial",
331 size = 2) +
332 xlab("Time bin") +
333 ylab("Proportion of looks to the target") +
334 scale_color_discrete("Word Frequency") +
335 scale_linetype_discrete("Competitors")
336
337 ggplot(d.agg, aes(x=Bin,
338 y= LooksToTarget,
339 col= as.factor(CondWordFrequency),
340 shape= as.factor(CondCompetitors),
341 linetype=as.factor(CondCompetitors)
342 )
343 ) +
344 geom_point(alpha=.05) +
345 geom_smooth(size=1) +
346 geom_smooth(method=lm,
347 formula= y ~ pol(x, 2),
348 family="binomial",
349 size = 2) +
350 xlab("Time bin") +
351 ylab("Proportion of looks to the target") +
352 scale_color_discrete("Word Frequency") +
353 scale_linetype_discrete("Competitors") +
354 scale_shape_discrete("Competitors")
355
356
357
358 d.agg$cCondWordFrequencyHigh =
359 myCenter(ifelse(d.agg$CondWordFrequency == "high", 1, 0))
360 d.agg$cCondCompetitorsTwo =
361 myCenter(ifelse(d.agg$CondCompetitors == "two", 1, 0))
362 lmer(emplog(LooksToTarget, TotalLooks$x) ~
363 cCondWordFrequencyHigh * cCondCompetitorsTwo *
364 cBin +
365 (1 | Subj) + (1 | Item),
366 d.agg,
367 weight = emplogweight(LooksToTarget, TotalLooks$x)
368 )
369
370 # with quadratic effect of bin
371 lmer(emplog(LooksToTarget, TotalLooks$x) ~
372 cCondWordFrequencyHigh * cCondCompetitorsTwo *
373 pol(cBin, 2) +
374 (1 | Subj) + (1 | Item),
375 d.agg,
376 weight = emplogweight(LooksToTarget, TotalLooks$x)
377 )
378
379
380
381
382
383
384 # 2) logit
385 d$cCondWordFrequencyHigh =
386 myCenter(ifelse(d$CondWordFrequency == "high", 1, 0))
387 d$cCondCompetitorsTwo =
388 myCenter(ifelse(d$CondCompetitors == "two", 1, 0))
389
390
391 # with quadratic effect of time
392 glmer(LooksToTarget ~
393 cCondWordFrequencyHigh * cCondCompetitorsTwo *
394 pol(cTime, 2) +
395 (1 | Subj) + (1 | Item),
396 d, family = "binomial"
397 )
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.