Attachment 'attention-r-commands.R'
Download 1 # R commands for Attention-Rochester analysis
2 #
3 # Anne Pier Salverda, 02/15/2008
4 #
5 # for an example of the procedure, see attention-design.ppt
6
7 # read datafile "attention-r-data.csv"
8 attention<-read.csv(file.choose())
9
10 # look at dataset
11 # get the variables in your dataset and then look at the overall structure
12 names(attention)
13 str(attention)
14
15 # output
16 #'data.frame': 768 obs. of 23 variables:
17 # $ subject : Factor w/ 16 levels "A1","A2","A3",..: 1 1 1 1 1 1 1 1 1 1 ...
18 # subject name
19
20 # $ condition : Factor w/ 2 levels "congruent","incongruent": 2 1 2 2 1 1 2 2 2 1 ...
21 # "congruent" when target==named; "incongruent" when target!=named
22
23 # $ target : Factor w/ 96 levels "acorn","anchor",..: 44 18 54 10 67 43 77 87 84 90 ...
24 # target object
25
26 # $ named : Factor w/ 96 levels "acorn","anchor",..: 39 18 20 59 67 43 19 45 48 90 ...
27 # named object
28
29 # $ namedfreq : num 2.08 4.21 3.64 3.56 2.08 ...
30 # frequency of named object
31
32 # $ namedcdens : num 46.8 30.4 42.0 115.1 32.7 ...
33 # cohort density of named object
34
35 # $ targetpos : Factor w/ 2 levels "L","R": 2 1 1 2 1 2 2 1 2 2 ...
36 # position of target ("L"==left; "R"==right)
37
38 # $ namedpos : Factor w/ 2 levels "L","R": 1 1 2 1 1 2 1 2 1 2 ...
39 # position of named object ("L"==left; "R"==right)
40
41 # $ fixated : Factor w/ 3 levels "","L","R": 3 2 2 3 2 3 3 2 3 3 ...
42 # object fixated upon color change ("L"==left; "R"==right)
43
44 # $ preview : Factor w/ 5 levels "L","LR","NO",..: 3 3 3 3 3 3 3 3 3 1 ...
45 # objects fixated prior to color change ("L"==left; "R"==right; "NO"==NONE)
46
47 # $ trial : int 13 14 15 16 17 18 19 20 21 22 ...
48 # trial number
49
50 # $ event : Factor w/ 1 level "ESACC": 1 1 1 1 1 1 1 1 1 1 ...
51 # type of eye movement (always saccade)
52
53 # $ eye : Factor w/ 2 levels "L","R": 2 2 2 2 2 2 2 2 2 2 ...
54 # eye recorded
55
56 # $ starttime : int 2019506 2028686 2038106 2047506 2117534 2127254 2137006 2146210 2155674 2165158 ...
57 # onset of saccade (time code)
58
59 # $ endtime : int 2019542 2028726 2038146 2047542 2117574 2127290 2137050 2146254 2155714 2165190 ...
60 # offset of saccade (time code)
61
62 # $ sacduration : int 40 44 44 40 44 40 48 48 44 36 ...
63 # saccade duration (in ms)
64
65 # $ x1 : num 324 325 324 322 311 ...
66 # start of saccade; X coordinate
67
68 # $ y1 : num 252 250 270 244 240 ...
69 # start of saccade; Y coordinate
70
71 # $ x2 : num 457 162 162 457 153 ...
72 # destination of saccade; X coordinate
73
74 # $ y2 : num 229 220 246 231 211 ...
75 # destination of saccade; X coordinate
76
77 # $ amplitude : num 7.45 9.05 8.96 7.49 8.88 7.94 9.49 8.74 8.64 7.67 ...
78 # amplitude of saccade
79
80 # $ peakvelocity: int 392 361 361 401 343 419 335 342 367 407 ...
81 # peak velocity of saccade
82
83 # $ latency : int 310 361 303 454 273 286 370 355 301 337 ...
84 # saccadic launch time; delay between color change and initiation of saccade
85
86
87 # some variables don't vary (event). Let's remove them
88 attention$event <- NULL
89 # or: attention <- subset(attention, select= -event)
90
91 # get an idea of the distributions
92 summary(attention)
93
94
95 #################################################################
96 # Questions:
97 #
98 # How does the congruence between named and target object
99 # influence saccades?
100 #################################################################
101
102 # Step 1: choose a outcome variable
103 # Step 2: understand the distribution of that outcome variable
104 # Step 3: apply transforms if necessary
105 # Step 4: run regression model (choose adequate model)
106 # Step 5: interpret results
107
108 # let's look at latencies, but not all latencies are of interest
109
110 # tolerance: distance from fixation that is acceptable at onset
111 tolerance <- 22
112 attention$dfix <- round(sqrt((320-attention$x1)*(320-attention$x1)+(240-attention$y1)*(240-attention$y1)),1)
113
114 # select only valid trials
115 attention <- subset(attention, as.character(targetpos)==as.character(fixated) & dfix<tolerance)
116
117
118 # plotting two variables against each other can be a good start
119 # plot() usually automatically chooses a "good" plot style
120 # in this case, this is a box plot, cf. boxplot()
121 plot(attention$condition, attention$latency)
122 # the line is the median
123 # box indicates IQR (inter quartile range, from 25th to 75th quartile)
124 # intervals indicate 1.5 IQRs below or above IRQ
125 # data outside of that range is usually considered an outlier
126
127 library(lattice)
128 histogram(~ latency | condition, data=attention, type="density")
129 densityplot(~ latency | condition, data=attention)
130
131 # test for normality
132 shapiro.test(attention[attention$condition == "congruent",]$latency)
133 shapiro.test(attention[attention$condition == "incongruent",]$latency)
134
135
136 # there are obviously some extreme cases. should we remove them?
137 # alternatively, we could downweigh high cases by log-transforming
138 # the data. Let's try this first. After all, all outliers seem to
139 # be HIGH outliers.
140 plot(attention$condition, log(attention$latency))
141 histogram(~ log(latency) | condition, data=attention, type="density")
142 shapiro.test(log(attention[attention$condition == "congruent",]$latency))
143 shapiro.test(log(attention[attention$condition == "incongruent",]$latency))
144
145 # square root transformation
146 plot(attention$condition, sqrt(attention$latency))
147 histogram(~ sqrt(latency) | condition, data=attention, type="density")
148 shapiro.test(sqrt(attention[attention$condition == "congruent",]$latency))
149 shapiro.test(sqrt(attention[attention$condition == "incongruent",]$latency))
150
151 # What makes the reaction time distribution so non-normal?
152 # Idea: different subject have very different base-rates.
153 par(mar=c(8,4.1,0.1,.1))
154 plot(as.factor(paste(attention$subject, attention$condition)), attention$latency, las= 3, ylab="latency")
155 plot(as.factor(paste(attention$subject, attention$condition)), log(attention$latency), las= 3, ylab="log latency")
156 # see also: xyplot(log(attention$latency) ~ attention$condition | attention$subject)
157
158 # Create a centered reaction time for each subject
159 attach(attention)
160 attention$c.latency <- unlist(lapply(split(latency, subject), FUN= function(x){ as.numeric(scale(x, scale=F)) }))
161 attention$c.log.latency <- unlist(lapply(split(log(latency), subject), FUN= function(x){ as.numeric(scale(x, scale=F)) }))
162 attention$z.latency <- unlist(lapply(split(latency, subject), FUN= function(x){ as.numeric(scale(x)) }))
163 attention$z.log.latency <- unlist(lapply(split(log(latency), subject), FUN= function(x){ as.numeric(scale(x)) }))
164 detach(attention)
165
166 plot(as.factor(paste(attention$subject, attention$condition)), attention$c.latency, las= 3, ylab="latency")
167 plot(as.factor(paste(attention$subject, attention$condition)), attention$c.log.latency, las= 3, ylab="log latency")
168 histogram(~ c.latency | condition, data=attention, type="density")
169 histogram(~ c.log.latency | condition, data=attention, type="density")
170
171 ################## outliers ##########################
172 # remove outliers
173 d <- subset(attention, latency < 450 & latency > 150 & abs(z.latency) < 2 )
174 nrow(d)
175 plot(d$condition, d$latency, las= 3, ylab="latency")
176 plot(d$condition, log(d$latency), las= 3, ylab="log latency")
177 histogram(~ latency | condition, data=d, type="density")
178 shapiro.test((d[d$condition == "congruent",]$latency))
179 shapiro.test((d[d$condition == "incongruent",]$latency))
180
181 histogram(~ log(latency) | condition, data=d, type="density")
182 shapiro.test(log(d[d$condition == "congruent",]$latency))
183 shapiro.test(log(d[d$condition == "incongruent",]$latency))
184
185 plot(as.factor(paste(d$subject, d$condition)), d$latency, las= 3, ylab="latency")
186
187 # sneak preview
188 library(gplots)
189 plotmeans(z.log.latency ~ condition, data=d)
190
191
192 ############### fitting a model ####################
193 l <- lm(z.log.latency ~ condition, d)
194 l
195
196 # summarizing an lm()
197 summary(l)
198 coef(l)
199 str(l)
200
201 # the G&H way of summarizing an lm()
202 library(arm)
203 display(l)
204
205 # assessing sequential
206 anova(l)
207
208 # could it be that there are other problems?
209 # let's also look at some of the
210 # other measures of saccades to identify "odd" ones
211 plot(d$sacduration)
212 plot(d$peakvelocity)
213
214 # or at many variables at ones
215 library(scatterplot3d)
216 scatterplot3d(x=d$latency, y=d$sacduration, z=d$peakvelocity,
217 xlab="latency\n(in ms)", ylab="duration\n(in ms)", zlab="peak velocity", main="Properties of saccades",
218 highlight.3d=T, scale.y= 2, pch=19)
219
220 histogram(~ sacduration | subject, d)
221 histogram(~ peakvelocity | subject, d)
222
223 # remove outliers where saccades took longer than 100msecs
224 d.cl <- subset(d, sacduration < 100)
225
226 # rerun model and compare with model on bigger data set
227 l.cl <- lm(z.log.latency ~ condition, d.cl)
228 display(l)
229 display(l.cl)
230
231 # Is one of the data to be preferred based on how justified
232 # the assumptions of the model are?
233 # Let's look at the distribution of residuals
234 par(mfrow=c(2,2))
235 plot(density(resid(l)), main="Full data", sub="density plot", xlab="residuals", ylab="density")
236 qqnorm(resid(l), main="", sub="QQplot")
237 qqline(resid(l), col=2, lty=3)
238 plot(density(resid(l.cl)), main="Reduced data", sub="density plot", xlab="residuals", ylab="density", col=4)
239 qqnorm(resid(l.cl), main="", sub="QQplot", col=4)
240 qqline(resid(l.cl), col=2, lty=3)
241 par(mfrow=c(1,1))
242
243 ######################### OLS ##########################
244 # another way to fit linear regression models that comes
245 # with a lot of nice features
246 library(Design)
247 dd <- datadist(d)
248 options(datadist='dd')
249 ol <- ols(z.log.latency ~ condition, d)
250
251 # comparing the output of ols() and lm()
252 ol
253 summary(ol)
254 plot(ol)
255
256 ############################################
257 # further exploration and hypothesis testing
258 ############################################
259 cor(d[, c('latency','namedfreq','namedcdens')])
260 pairs(d[, c('latency','namedfreq','namedcdens')])
261
262 # we can define function for the panels above and below
263 # the diagonal and display whatever information we want
264 # here, we define a scatterplot with a lowess (locally fitted
265 # smoothed linear regressions) for the lower panels and
266 # the correlations coefficient between the two variables for
267 # the upper panel (to be plotted bigger for bigger values).
268 panel.cor <- function(x, y, digits=2, prefix="", cex.cor)
269 {
270 usr <- par("usr"); on.exit(par(usr))
271 par(usr = c(0, 1, 0, 1))
272 r <- abs(cor(x, y))
273 txt <- format(c(r, 0.123456789), digits=digits)[1]
274 txt <- paste(prefix, txt, sep="")
275 if(missing(cex.cor)) cex <- 1/strwidth(txt)
276 text(0.5, 0.5, txt, cex = cex * r, col="#000099")
277 }
278 panel.smooth <- function (x, y, col = par("col"), bg = NA, pch = par("pch"),
279 cex = 1, col.smooth = "#000099", span = 2/3, iter = 3, ...)
280 {
281 points(x, y, pch = ".", col = col, bg = bg, cex = cex)
282 ok <- is.finite(x) & is.finite(y)
283 if (any(ok))
284 lines(stats::lowess(x[ok], y[ok], f = span, iter = iter),
285 col = col.smooth, lwd=2, ...)
286 }
287 par(mar=c(1,.5,.5,1))
288 pairs(d[,c('latency','peakvelocity','sacduration','namedfreq','namedcdens')],
289 labels=c('saccade\nlatency','saccade\npeak velocity','saccade\nduration','log frequency\nof named','neighborhood\ndensity of named'),
290 lower.panel=panel.smooth, upper.panel=panel.cor)
291
292 d$z.namedfreq <- as.numeric(scale(d$namedfreq))
293 d$z.nameddens <- as.numeric(scale(d$namedcdens))
294 d$z.log.nameddens <- as.numeric(scale(log(d$namedcdens)))
295 dd.fd <- datadist(d)
296 options(datadist='dd.fd', digits=2)
297 ol.fd.c <- ols(z.log.latency ~ z.namedfreq + z.nameddens, d, subset= condition == "congruent")
298 ol.fd.c
299 ol.fd.i <- ols(z.log.latency ~ z.namedfreq + z.nameddens, d, subset= condition == "incongruent")
300 ol.fd.i
301
302 ol.fd <- ols(z.log.latency ~ condition * (z.namedfreq + z.nameddens), d)
303 ol.fd
304
305 # assessing importance of factors
306 # NB: anova() applied to ols() is non-sequential
307 # the F-statistics gives us a measure of the
308 # improvement of the model by the given predictor
309 # given that all other predictors are already in
310 # the model
311 anova(ol.fd)
312 fastbw(ol.fd)
313
314 ## and for lm()
315 #l.fd <- lm(z.log.latency ~ condition * (z.namedfreq + z.nameddens), d)
316 ## anova() applied to lm() tests sequential adding!
317 #anova(l.fd)
318 #drop1(l.fd)
319
320 ######################### plotting the model ##########
321 # open a window with 2 by 2 slots for plotting
322 par(mfrow=c(2,2))
323 plot(ol.fd)
324 par(mfrow=c(1,1))
325 attach(d)
326 p<- plot(ol.fd, z.namedfreq=NA, condition=NA)
327 datadensity(p, z.namedfreq, condition)
328 detach(d)
329 plot(ol.fd, z.namedfreq=NA, z.nameddens=NA, method="persp")
330 plot(ol.fd, z.namedfreq=NA, z.nameddens=NA, method="contour")
331 plot(ol.fd, z.namedfreq=NA, z.nameddens=NA, method="image")
332
333 dd.nl <- datadist(d)
334 options(datadist='dd.nl')
335 ol.nl <- ols(z.log.latency ~ condition * (rcs(z.namedfreq) + rcs(z.nameddens)), d)
336 ol.nl
337
338 # plotting
339 attach(d)
340 p <- plot(ol.nl, z.namedfreq=NA, ylab="predicted z-score of log latency", xlab="log frequency of named")
341 datadensity(p, z.namedfreq)
342 p <- plot(ol.nl, z.nameddens=NA, ylab="predicted z-score of log latency", xlab="neighborhood density of named")
343 datadensity(p, z.nameddens)
344
345 # additive effect of frequency and neighborhood density
346 plot(ol.nl, z.nameddens=NA, z.namedfreq=NA)
347 plot(ol.nl, z.nameddens=NA, z.namedfreq=NA, method="image")
348 detach(d)
349
350
351 ######################## individual differences ###########
352 # plot the effect by groups (e.g. by subject)
353 trellis.device(color=F)
354 xyplot(latency ~ condition | subject,
355 data=d,
356 main="By-subject LMs",
357 ylab="latency",
358 xlab="Congruency condition",
359 panel=function(x, y){
360 panel.xyplot(x, y, col=1)
361 panel.lmline(x, y, lty=2, col=2)
362 }
363 )
364
365 # or run a linear model for each subject and look
366 # at all the coefficients
367 d$log.latency <- log(d$latency)
368 ll <- lmList(log.latency ~ condition | subject, d)
369 ll
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.