attachment:LSA13-AuxLecture5-TimeSeriesData-eye-tracking-analysis.R of LSA2013Regression

   1 ## -----------------
   2 # functions
   3 ## -----------------
   4 myCenter = function(x) x - mean(x, na.rm=T)
   5 emplog = function(p, totalN) {
   6 	y = p*totalN
   7  	return(log( (y+.5)/ (totalN-y + .5) ))
   8 }
   9 emplogweight = function(p, totalN) {
  10  	y = p*totalN
  11 	return(1 / ( 1 / (y+.5) + 1 / (totalN-y + .5) ))
  12 }
  13 
  14 
  15 ## -----------------
  16 # load data
  17 ## -----------------
  18 setwd("C:\\Users\\tiflo\\Documents\\My LabMeetings, Tutorials, & Teaching\\LSA13\\latex\\scripts")
  19 d = read.csv(file = "eye-tracking-sample.csv")
  20 str(d)
  21 
  22 ## -----------------
  23 # analysis 1: ANOVA (collapsing over time)
  24 ## -----------------
  25 d.agg = aggregate(d[,c('LooksToTarget')], by= list(
  26 		Subj = d$Subj,
  27 		CondWordFrequency = d$CondWordFrequency,
  28 		CondCompetitors = d$CondCompetitors
  29 	),
  30 	FUN = mean
  31 )
  32 names(d.agg)[length(names(d.agg))] = "LooksToTarget"
  33 str(d.agg)
  34 
  35 # visualize
  36 with(d.agg, interaction.plot(CondCompetitors, CondWordFrequency, LooksToTarget,  
  37 	type="b", 
  38 	col=c(1:2), 
  39 	legend = F,
  40 #  	leg.bty="o", leg.bg="beige", 
  41 	lwd=2, 
  42 	pch=c(18,24),	
  43 	ylim= c(0,1),
  44    	xlab="Number of competitors", 
  45    	ylab="Proportion looks to target", 
  46    	main="Interaction Plot")
  47 )
  48 legend(1.5, 1, title= "Word Frequency", 
  49 	legend = c("high", "low"),
  50 	col=c(1:2), 
  51   	bty="o", 
  52 	bg="beige", 
  53 	lwd=2, 
  54 	pch=c(18,24)
  55 )
  56 
  57 # see: http://www.statmethods.net/stats/anova.html
  58 m.F1 = aov(LooksToTarget ~ 1 + CondWordFrequency * CondCompetitors + 
  59 	Error(Subj/(CondWordFrequency * CondCompetitors)), 
  60 	data = d.agg)
  61 summary(m.F1)
  62 
  63 d.agg = aggregate(d[,c('LooksToTarget')], by= list(
  64 		Item = d$Item,
  65 		CondWordFrequency = d$CondWordFrequency,
  66 		CondCompetitors = d$CondCompetitors
  67 	),
  68 	FUN = mean
  69 )
  70 names(d.agg)[length(names(d.agg))] = "LooksToTarget"
  71 str(d.agg)
  72 
  73 m.F2 = aov(LooksToTarget ~ 1 + CondWordFrequency * CondCompetitors + 
  74 	Error(Item/(CondWordFrequency * CondCompetitors)), 
  75 	data = d.agg)
  76 summary(m.F2)
  77 
  78 ## -----------------
  79 # analysis 2: ANOVA (collapsing over time) - arcsine
  80 ## -----------------
  81 d.agg = aggregate(d[,c('LooksToTarget')], by= list(
  82 		Subj = d$Subj,
  83 		CondWordFrequency = d$CondWordFrequency,
  84 		CondCompetitors = d$CondCompetitors
  85 	),
  86 	FUN = function(x) { asin(sqrt(mean(x))) }
  87 )
  88 names(d.agg)[length(names(d.agg))] = "LooksToTarget"
  89 str(d.agg)
  90 
  91 m.F1 = aov(LooksToTarget ~ 1 + CondWordFrequency * CondCompetitors + 
  92 	Error(Subj/(CondWordFrequency * CondCompetitors)), 
  93 	data = d.agg)
  94 summary(m.F1)
  95 
  96 d.agg = aggregate(d[,c('LooksToTarget')], by= list(
  97 		Item = d$Item,
  98 		CondWordFrequency = d$CondWordFrequency,
  99 		CondCompetitors = d$CondCompetitors
 100 	),
 101 	FUN = function(x) { asin(sqrt(mean(x))) }
 102 )
 103 names(d.agg)[length(names(d.agg))] = "LooksToTarget"
 104 str(d.agg) 
 105 
 106 m.F2 = aov(LooksToTarget ~ 1 + CondWordFrequency * CondCompetitors + 
 107 	Error(Item/(CondWordFrequency * CondCompetitors)), 
 108 	data = d.agg)
 109 summary(m.F2)
 110 
 111 
 112 ## -----------------
 113 # analysis 3: mixed linear model
 114 # 
 115 # note: aggregation over subject and items combined
 116 ## -----------------
 117 d.agg = aggregate(d[,c('LooksToTarget')], by= list(
 118 		Subj = d$Subj,
 119 		Item = d$Item,
 120 		CondWordFrequency = d$CondWordFrequency,
 121 		CondCompetitors = d$CondCompetitors
 122 	),
 123 	FUN = mean
 124 )
 125 names(d.agg)[length(names(d.agg))] = "LooksToTarget"
 126 
 127 d.agg$cCondWordFrequencyHigh = myCenter(ifelse(d.agg$CondWordFrequency == "high", 1, 0))
 128 d.agg$cCondCompetitorsTwo = myCenter(ifelse(d.agg$CondCompetitors == "two", 1, 0))
 129 
 130 # model with full random effect structure
 131 library(lme4)
 132 m.full = lmer(LooksToTarget ~ 
 133 	cCondWordFrequencyHigh * cCondCompetitorsTwo + 
 134 	(1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Subj) + 
 135 	(1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Item), 
 136 	d.agg
 137 )
 138 m.full
 139 
 140 
 141 ## -----------------
 142 # analysis 5: empirical logit
 143 # 
 144 # note: aggregation over subject and items combined
 145 ## -----------------
 146 d.agg = aggregate(d[,c('LooksToTarget')], by= list(
 147 		Subj = d$Subj,
 148 		Item = d$Item,
 149 		CondWordFrequency = d$CondWordFrequency,
 150 		CondCompetitors = d$CondCompetitors
 151 	),
 152 	FUN = mean
 153 )
 154 names(d.agg)[length(names(d.agg))] = "LooksToTarget"
 155 
 156 TotalLooks = aggregate(d[,c('LooksToTarget')], by= list(
 157 		Subj = d$Subj,
 158 		Item = d$Item,
 159 		CondWordFrequency = d$CondWordFrequency,
 160 		CondCompetitors = d$CondCompetitors
 161 	),
 162 	FUN = length
 163 )
 164 
 165 d.agg$cCondWordFrequencyHigh = 
 166     myCenter(ifelse(d.agg$CondWordFrequency == "high", 1, 0))
 167 d.agg$cCondCompetitorsTwo = 
 168     myCenter(ifelse(d.agg$CondCompetitors == "two", 1, 0))
 169 
 170 # model with full random effect structure
 171 m.full = lmer(emplog(LooksToTarget, TotalLooks$x) ~ 
 172 	cCondWordFrequencyHigh * cCondCompetitorsTwo + 
 173 	(1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Subj) + 
 174 	(1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Item), 
 175 	d.agg, 
 176 	weight = emplogweight(LooksToTarget, TotalLooks$x)
 177 )
 178 m.full
 179 # deviance 1191ish
 180 
 181 # model with only random intercepts
 182 m.simple = lmer(emplog(LooksToTarget, TotalLooks$x) ~ 
 183 	cCondWordFrequencyHigh * cCondCompetitorsTwo + 
 184 	(1 | Subj) + 
 185 	(1 | Item), 
 186 	d.agg,
 187 	weight = emplogweight(LooksToTarget, TotalLooks$x)
 188 )
 189 m.simple
 190 anova(m.full,m.simple)
 191 # deviance 1209, so there is room for improvement beyond the intercepts
 192 
 193 # model with full random effect structure for item
 194 m.full.item.1 = lmer(emplog(LooksToTarget, TotalLooks$x) ~ 
 195 	cCondWordFrequencyHigh * cCondCompetitorsTwo + 
 196 	(1 + cCondWordFrequencyHigh + cCondCompetitorsTwo | Subj) + 
 197 	(1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Item), 
 198 	d.agg, 
 199 	weight = emplogweight(LooksToTarget, TotalLooks$x)
 200 )
 201 
 202 m.full.item.2 = lmer(emplog(LooksToTarget, TotalLooks$x) ~ 
 203 	cCondWordFrequencyHigh * cCondCompetitorsTwo + 
 204 	(1 +  cCondWordFrequencyHigh | Subj) + 
 205 	(1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Item), 
 206 	d.agg, 
 207 	weight = emplogweight(LooksToTarget, TotalLooks$x)
 208 )
 209 
 210 m.full.item.3 = lmer(emplog(LooksToTarget, TotalLooks$x) ~ 
 211 	cCondWordFrequencyHigh * cCondCompetitorsTwo + 
 212 	(1 + cCondCompetitorsTwo | Subj) + 
 213 	(1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Item), 
 214 	d.agg, 
 215 	weight = emplogweight(LooksToTarget, TotalLooks$x)
 216 )
 217 
 218 # comparison of the above models in terms of their deviance suggests
 219 # that the only justified random slope for subjects is one for 
 220 # competitors. The resulting model is just as good as the full model.
 221 
 222 m.1.3 = lmer(emplog(LooksToTarget, TotalLooks$x) ~ 
 223 	cCondWordFrequencyHigh * cCondCompetitorsTwo + 
 224 	(1 + cCondCompetitorsTwo | Subj) + 
 225 	(1 + cCondWordFrequencyHigh + cCondCompetitorsTwo | Item), 
 226 	d.agg, 
 227 	weight = emplogweight(LooksToTarget, TotalLooks$x)
 228 )
 229 anova(m.full.item.3, m.1.3)
 230 
 231 
 232 m.2.3 = lmer(emplog(LooksToTarget, TotalLooks$x) ~ 
 233 	cCondWordFrequencyHigh * cCondCompetitorsTwo + 
 234 	(1 + cCondCompetitorsTwo | Subj) + 
 235 	(1 + cCondCompetitorsTwo  | Item), 
 236 	d.agg,
 237 	weight = emplogweight(LooksToTarget, TotalLooks$x)
 238 )
 239 anova(m.full.item.3, m.2.3, m.1.3)
 240 
 241 m.3.3 = lmer(emplog(LooksToTarget, TotalLooks$x) ~ 
 242 	cCondWordFrequencyHigh * cCondCompetitorsTwo + 
 243 	(1 + cCondCompetitorsTwo | Subj) + 
 244 	(1 + cCondWordFrequencyHigh | Item), 
 245 	d.agg, 
 246 	weight = emplogweight(LooksToTarget, TotalLooks$x)
 247 )
 248 anova(m.full.item.3, m.3.3, m.1.3)
 249 
 250 # m.1.3 seems the best model
 251 
 252 
 253 ## -----------------
 254 # analysis 4: logit
 255 # 
 256 # note: no aggregation. This analysis would yield inflated confidence. 
 257 # It is hence anti-consersvative
 258 ## -----------------
 259 
 260 
 261 d.agg$cCondWordFrequencyHigh = 
 262     myCenter(ifelse(d.agg$CondWordFrequency == "high", 1, 0))
 263 d.agg$cCondCompetitorsTwo = 
 264     myCenter(ifelse(d.agg$CondCompetitors == "two", 1, 0))
 265 
 266 # full model (do not run, because it takes a long time to fit
 267 # since the data were artificial created in a way that means 
 268 # that the full model is unjustified
 269 # m.full = lmer(LooksToTarget ~ CondWordFrequency * CondCompetitors +
 270 #	(1 + CondWordFrequency * CondCompetitors | Subj) +
 271 #	(1 + CondWordFrequency * CondCompetitors | Item),
 272 #	data = d,
 273 #	family = "binomial"
 274 # )
 275 # m.full
 276 #
 277 # Deviance: 39968
 278 # save(m.full, file = "eye-tracking-full-model.Rdata", compress=T)
 279 
 280 
 281 # reducing random effects
 282 m.simple = glmer(LooksToTarget ~ cCondWordFrequencyHigh * cCondCompetitorsTwo +
 283 	(1 | Subj) +
 284 	(1 | Item),
 285 	data = d,
 286 	family = "binomial"
 287 )
 288 m.simple
 289 
 290 
 291 
 292 
 293 ## -----------------
 294 # Now let's take time into consideration:
 295 #
 296 # There are two common ways to analyze this type of 
 297 # binomial time series data:
 298 #
 299 # 1) weighted linear regression over empirical logits
 300 # 2) binomial logit regression
 301 #
 302 # Binned data (for Barr type emp. logit analysis)
 303 ## -----------------
 304 
 305 d.agg = aggregate(d[,c('LooksToTarget')], by= list(
 306 		Subj = d$Subj,
 307 		Item = d$Item,
 308 		CondWordFrequency = d$CondWordFrequency,
 309 		CondCompetitors = d$CondCompetitors,
 310 		Bin = d$Bin
 311 	),
 312 	FUN = mean,
 313 	nfrequency = T
 314 )
 315 names(d.agg)[length(names(d.agg))] = "LooksToTarget"
 316 d.agg$cBin = myCenter(d.agg$Bin)
 317 
 318 # plot
 319 library(ggplot2)
 320 library(rms)
 321 ggplot(d.agg, aes(x=Bin, 
 322 		y= LooksToTarget, 
 323 		col= as.factor(CondWordFrequency), 
 324 		linetype=as.factor(CondCompetitors)
 325 	    )
 326 	) +
 327 	geom_smooth(size=1) +
 328 	geom_smooth(method=lm, 
 329 		formula= y ~ pol(x, 2),
 330 		family="binomial",
 331 		size = 2) +
 332 	xlab("Time bin") +
 333 	ylab("Proportion of looks to the target") +
 334 	scale_color_discrete("Word Frequency") +
 335 	scale_linetype_discrete("Competitors") 
 336 
 337 ggplot(d.agg, aes(x=Bin, 
 338 		y= LooksToTarget, 
 339 		col= as.factor(CondWordFrequency), 
 340 		shape= as.factor(CondCompetitors),
 341 		linetype=as.factor(CondCompetitors)
 342 	    )
 343 	) +
 344 	geom_point(alpha=.05) +
 345 	geom_smooth(size=1) +
 346 	geom_smooth(method=lm, 
 347 		formula= y ~ pol(x, 2),
 348 		family="binomial",
 349 		size = 2) +
 350 	xlab("Time bin") +
 351 	ylab("Proportion of looks to the target") +
 352 	scale_color_discrete("Word Frequency") +
 353 	scale_linetype_discrete("Competitors") +
 354 	scale_shape_discrete("Competitors")
 355 
 356 
 357 
 358 d.agg$cCondWordFrequencyHigh = 
 359     myCenter(ifelse(d.agg$CondWordFrequency == "high", 1, 0))
 360 d.agg$cCondCompetitorsTwo = 
 361     myCenter(ifelse(d.agg$CondCompetitors == "two", 1, 0))
 362 lmer(emplog(LooksToTarget, TotalLooks$x) ~ 
 363 	cCondWordFrequencyHigh * cCondCompetitorsTwo * 
 364 	cBin + 
 365 	(1 | Subj) + (1 | Item), 
 366 	d.agg, 
 367 	weight = emplogweight(LooksToTarget, TotalLooks$x)
 368 )
 369 
 370 # with quadratic effect of bin
 371 lmer(emplog(LooksToTarget, TotalLooks$x) ~ 
 372 	cCondWordFrequencyHigh * cCondCompetitorsTwo * 
 373 	pol(cBin, 2) + 
 374 	(1 | Subj) + (1 | Item), 
 375 	d.agg, 
 376 	weight = emplogweight(LooksToTarget, TotalLooks$x)
 377 )
 378 
 379 
 380 
 381 
 382 
 383 
 384 #  2) logit
 385 d$cCondWordFrequencyHigh = 
 386     myCenter(ifelse(d$CondWordFrequency == "high", 1, 0))
 387 d$cCondCompetitorsTwo = 
 388     myCenter(ifelse(d$CondCompetitors == "two", 1, 0))
 389 
 390 
 391 # with quadratic effect of time
 392 glmer(LooksToTarget ~ 
 393 	cCondWordFrequencyHigh * cCondCompetitorsTwo * 
 394 	pol(cTime, 2) + 
 395 	(1 | Subj) + (1 | Item), 
 396 	d, family = "binomial"
 397 )
Attachment 'LSA13-AuxLecture5-TimeSeriesData-eye-tracking-analysis.R'

Attached Files