## ----------------- # functions ## ----------------- myCenter = function(x) x - mean(x, na.rm=T) emplog = function(p, totalN) { y = p*totalN return(log( (y+.5)/ (totalN-y + .5) )) } emplogweight = function(p, totalN) { y = p*totalN return(1 / ( 1 / (y+.5) + 1 / (totalN-y + .5) )) } ## ----------------- # load data ## ----------------- setwd("C:\\Users\\tiflo\\Documents\\My LabMeetings, Tutorials, & Teaching\\LSA13\\latex\\scripts") d = read.csv(file = "eye-tracking-sample.csv") str(d) ## ----------------- # analysis 1: ANOVA (collapsing over time) ## ----------------- d.agg = aggregate(d[,c('LooksToTarget')], by= list( Subj = d$Subj, CondWordFrequency = d$CondWordFrequency, CondCompetitors = d$CondCompetitors ), FUN = mean ) names(d.agg)[length(names(d.agg))] = "LooksToTarget" str(d.agg) # visualize with(d.agg, interaction.plot(CondCompetitors, CondWordFrequency, LooksToTarget, type="b", col=c(1:2), legend = F, # leg.bty="o", leg.bg="beige", lwd=2, pch=c(18,24), ylim= c(0,1), xlab="Number of competitors", ylab="Proportion looks to target", main="Interaction Plot") ) legend(1.5, 1, title= "Word Frequency", legend = c("high", "low"), col=c(1:2), bty="o", bg="beige", lwd=2, pch=c(18,24) ) # see: http://www.statmethods.net/stats/anova.html m.F1 = aov(LooksToTarget ~ 1 + CondWordFrequency * CondCompetitors + Error(Subj/(CondWordFrequency * CondCompetitors)), data = d.agg) summary(m.F1) d.agg = aggregate(d[,c('LooksToTarget')], by= list( Item = d$Item, CondWordFrequency = d$CondWordFrequency, CondCompetitors = d$CondCompetitors ), FUN = mean ) names(d.agg)[length(names(d.agg))] = "LooksToTarget" str(d.agg) m.F2 = aov(LooksToTarget ~ 1 + CondWordFrequency * CondCompetitors + Error(Item/(CondWordFrequency * CondCompetitors)), data = d.agg) summary(m.F2) ## ----------------- # analysis 2: ANOVA (collapsing over time) - arcsine ## ----------------- d.agg = aggregate(d[,c('LooksToTarget')], by= list( Subj = d$Subj, CondWordFrequency = d$CondWordFrequency, CondCompetitors = d$CondCompetitors ), FUN = function(x) { asin(sqrt(mean(x))) } ) names(d.agg)[length(names(d.agg))] = "LooksToTarget" str(d.agg) m.F1 = aov(LooksToTarget ~ 1 + CondWordFrequency * CondCompetitors + Error(Subj/(CondWordFrequency * CondCompetitors)), data = d.agg) summary(m.F1) d.agg = aggregate(d[,c('LooksToTarget')], by= list( Item = d$Item, CondWordFrequency = d$CondWordFrequency, CondCompetitors = d$CondCompetitors ), FUN = function(x) { asin(sqrt(mean(x))) } ) names(d.agg)[length(names(d.agg))] = "LooksToTarget" str(d.agg) m.F2 = aov(LooksToTarget ~ 1 + CondWordFrequency * CondCompetitors + Error(Item/(CondWordFrequency * CondCompetitors)), data = d.agg) summary(m.F2) ## ----------------- # analysis 3: mixed linear model # # note: aggregation over subject and items combined ## ----------------- d.agg = aggregate(d[,c('LooksToTarget')], by= list( Subj = d$Subj, Item = d$Item, CondWordFrequency = d$CondWordFrequency, CondCompetitors = d$CondCompetitors ), FUN = mean ) names(d.agg)[length(names(d.agg))] = "LooksToTarget" d.agg$cCondWordFrequencyHigh = myCenter(ifelse(d.agg$CondWordFrequency == "high", 1, 0)) d.agg$cCondCompetitorsTwo = myCenter(ifelse(d.agg$CondCompetitors == "two", 1, 0)) # model with full random effect structure library(lme4) m.full = lmer(LooksToTarget ~ cCondWordFrequencyHigh * cCondCompetitorsTwo + (1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Subj) + (1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Item), d.agg ) m.full ## ----------------- # analysis 5: empirical logit # # note: aggregation over subject and items combined ## ----------------- d.agg = aggregate(d[,c('LooksToTarget')], by= list( Subj = d$Subj, Item = d$Item, CondWordFrequency = d$CondWordFrequency, CondCompetitors = d$CondCompetitors ), FUN = mean ) names(d.agg)[length(names(d.agg))] = "LooksToTarget" TotalLooks = aggregate(d[,c('LooksToTarget')], by= list( Subj = d$Subj, Item = d$Item, CondWordFrequency = d$CondWordFrequency, CondCompetitors = d$CondCompetitors ), FUN = length ) d.agg$cCondWordFrequencyHigh = myCenter(ifelse(d.agg$CondWordFrequency == "high", 1, 0)) d.agg$cCondCompetitorsTwo = myCenter(ifelse(d.agg$CondCompetitors == "two", 1, 0)) # model with full random effect structure m.full = lmer(emplog(LooksToTarget, TotalLooks$x) ~ cCondWordFrequencyHigh * cCondCompetitorsTwo + (1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Subj) + (1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Item), d.agg, weight = emplogweight(LooksToTarget, TotalLooks$x) ) m.full # deviance 1191ish # model with only random intercepts m.simple = lmer(emplog(LooksToTarget, TotalLooks$x) ~ cCondWordFrequencyHigh * cCondCompetitorsTwo + (1 | Subj) + (1 | Item), d.agg, weight = emplogweight(LooksToTarget, TotalLooks$x) ) m.simple anova(m.full,m.simple) # deviance 1209, so there is room for improvement beyond the intercepts # model with full random effect structure for item m.full.item.1 = lmer(emplog(LooksToTarget, TotalLooks$x) ~ cCondWordFrequencyHigh * cCondCompetitorsTwo + (1 + cCondWordFrequencyHigh + cCondCompetitorsTwo | Subj) + (1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Item), d.agg, weight = emplogweight(LooksToTarget, TotalLooks$x) ) m.full.item.2 = lmer(emplog(LooksToTarget, TotalLooks$x) ~ cCondWordFrequencyHigh * cCondCompetitorsTwo + (1 + cCondWordFrequencyHigh | Subj) + (1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Item), d.agg, weight = emplogweight(LooksToTarget, TotalLooks$x) ) m.full.item.3 = lmer(emplog(LooksToTarget, TotalLooks$x) ~ cCondWordFrequencyHigh * cCondCompetitorsTwo + (1 + cCondCompetitorsTwo | Subj) + (1 + cCondWordFrequencyHigh * cCondCompetitorsTwo | Item), d.agg, weight = emplogweight(LooksToTarget, TotalLooks$x) ) # comparison of the above models in terms of their deviance suggests # that the only justified random slope for subjects is one for # competitors. The resulting model is just as good as the full model. m.1.3 = lmer(emplog(LooksToTarget, TotalLooks$x) ~ cCondWordFrequencyHigh * cCondCompetitorsTwo + (1 + cCondCompetitorsTwo | Subj) + (1 + cCondWordFrequencyHigh + cCondCompetitorsTwo | Item), d.agg, weight = emplogweight(LooksToTarget, TotalLooks$x) ) anova(m.full.item.3, m.1.3) m.2.3 = lmer(emplog(LooksToTarget, TotalLooks$x) ~ cCondWordFrequencyHigh * cCondCompetitorsTwo + (1 + cCondCompetitorsTwo | Subj) + (1 + cCondCompetitorsTwo | Item), d.agg, weight = emplogweight(LooksToTarget, TotalLooks$x) ) anova(m.full.item.3, m.2.3, m.1.3) m.3.3 = lmer(emplog(LooksToTarget, TotalLooks$x) ~ cCondWordFrequencyHigh * cCondCompetitorsTwo + (1 + cCondCompetitorsTwo | Subj) + (1 + cCondWordFrequencyHigh | Item), d.agg, weight = emplogweight(LooksToTarget, TotalLooks$x) ) anova(m.full.item.3, m.3.3, m.1.3) # m.1.3 seems the best model ## ----------------- # analysis 4: logit # # note: no aggregation. This analysis would yield inflated confidence. # It is hence anti-consersvative ## ----------------- d.agg$cCondWordFrequencyHigh = myCenter(ifelse(d.agg$CondWordFrequency == "high", 1, 0)) d.agg$cCondCompetitorsTwo = myCenter(ifelse(d.agg$CondCompetitors == "two", 1, 0)) # full model (do not run, because it takes a long time to fit # since the data were artificial created in a way that means # that the full model is unjustified # m.full = lmer(LooksToTarget ~ CondWordFrequency * CondCompetitors + # (1 + CondWordFrequency * CondCompetitors | Subj) + # (1 + CondWordFrequency * CondCompetitors | Item), # data = d, # family = "binomial" # ) # m.full # # Deviance: 39968 # save(m.full, file = "eye-tracking-full-model.Rdata", compress=T) # reducing random effects m.simple = glmer(LooksToTarget ~ cCondWordFrequencyHigh * cCondCompetitorsTwo + (1 | Subj) + (1 | Item), data = d, family = "binomial" ) m.simple ## ----------------- # Now let's take time into consideration: # # There are two common ways to analyze this type of # binomial time series data: # # 1) weighted linear regression over empirical logits # 2) binomial logit regression # # Binned data (for Barr type emp. logit analysis) ## ----------------- d.agg = aggregate(d[,c('LooksToTarget')], by= list( Subj = d$Subj, Item = d$Item, CondWordFrequency = d$CondWordFrequency, CondCompetitors = d$CondCompetitors, Bin = d$Bin ), FUN = mean, nfrequency = T ) names(d.agg)[length(names(d.agg))] = "LooksToTarget" d.agg$cBin = myCenter(d.agg$Bin) # plot library(ggplot2) library(rms) ggplot(d.agg, aes(x=Bin, y= LooksToTarget, col= as.factor(CondWordFrequency), linetype=as.factor(CondCompetitors) ) ) + geom_smooth(size=1) + geom_smooth(method=lm, formula= y ~ pol(x, 2), family="binomial", size = 2) + xlab("Time bin") + ylab("Proportion of looks to the target") + scale_color_discrete("Word Frequency") + scale_linetype_discrete("Competitors") ggplot(d.agg, aes(x=Bin, y= LooksToTarget, col= as.factor(CondWordFrequency), shape= as.factor(CondCompetitors), linetype=as.factor(CondCompetitors) ) ) + geom_point(alpha=.05) + geom_smooth(size=1) + geom_smooth(method=lm, formula= y ~ pol(x, 2), family="binomial", size = 2) + xlab("Time bin") + ylab("Proportion of looks to the target") + scale_color_discrete("Word Frequency") + scale_linetype_discrete("Competitors") + scale_shape_discrete("Competitors") d.agg$cCondWordFrequencyHigh = myCenter(ifelse(d.agg$CondWordFrequency == "high", 1, 0)) d.agg$cCondCompetitorsTwo = myCenter(ifelse(d.agg$CondCompetitors == "two", 1, 0)) lmer(emplog(LooksToTarget, TotalLooks$x) ~ cCondWordFrequencyHigh * cCondCompetitorsTwo * cBin + (1 | Subj) + (1 | Item), d.agg, weight = emplogweight(LooksToTarget, TotalLooks$x) ) # with quadratic effect of bin lmer(emplog(LooksToTarget, TotalLooks$x) ~ cCondWordFrequencyHigh * cCondCompetitorsTwo * pol(cBin, 2) + (1 | Subj) + (1 | Item), d.agg, weight = emplogweight(LooksToTarget, TotalLooks$x) ) # 2) logit d$cCondWordFrequencyHigh = myCenter(ifelse(d$CondWordFrequency == "high", 1, 0)) d$cCondCompetitorsTwo = myCenter(ifelse(d$CondCompetitors == "two", 1, 0)) # with quadratic effect of time glmer(LooksToTarget ~ cCondWordFrequencyHigh * cCondCompetitorsTwo * pol(cTime, 2) + (1 | Subj) + (1 | Item), d, family = "binomial" )