library(languageR)

## ---------------------------------------------------
# Lexical decision time data
#
# Q: To what extent are lexical decisions influenced by
#    (a) properties of the participants, (b) words (items) 
#    used in the experiment, (c) design properties of the 
#    experiment, or (d) more general factors?
## ---------------------------------------------------
data(lexdec)
library(lme4, keep.source = FALSE)
library(arm)

lm.simp <- lm(RT ~ 1, data=lexdec)
display(lm.simp)

lm.1 <- lm(RT ~ meanWeight + Frequency, data=lexdec)
summary(lm.1)
vif(lm.1)

lm.2 <- lm(RT ~ 1 + Length + meanWeight + Frequency + FamilySize + SynsetCount
		   , data=lexdec)
summary(lm.2)

# comparison of models
anova(lm.2, lm.1, lm.simp)

lexdec$SubjFreqRatio <- lexdec$SubjFreq - lexdec$Frequency
lm.full <- lm(RT ~ 1 + meanWeight + Frequency + Length + DerivEntropy + SubjFreqRatio
			, data= lexdec)
summary(lm.full)
vif(lm.full)

cor(lexdec[,c('meanWeight','Frequency','Length','DerivEntropy','SubjFreqRatio')])

lm.full <- lm(RT ~ 1 + Frequency + SubjFreqRatio
			, data= lexdec)
summary(lm.full)

## ---------------------------------------------------
# investigate the factors in the data set and build a good model 
# ONLY USING LINGUISTIC FACTORS
## ---------------------------------------------------

## ---------------------------------------------------
# Do subjects differ?
## ---------------------------------------------------

ll.1 <- lmList(RT ~ 1 + Frequency + SubjFreqRatio | Subject, data=lexdec)
ll.1
str(ll.1)

# a little function to calculate the standard error of a 
# sample (in a vector)
se <- function(x) {
	sqrt(var(x)) / sqrt(length(x))
}

# coef and standard error of coef from each model
paste("[", sapply(ll.1, coef), ", ", sapply(ll.1, se.coef), "]", sep="")

# coef and standard error across model
paste("[", mean(sapply(ll.1, coef)[2,]), ", ", se(sapply(ll.1, se.coef)[,2]), "]", sep="")


# Trellis plot of individual participant coefficients, xyplot
trellis.device(color=F)
xyplot(RT ~ Frequency | Subject,
	data=lexdec, 
	main="By-subject LMs",
	ylab="log reaction times",
	xlab="",
#	ylim= c(-200,200),
	panel=function(x, y){
		panel.xyplot(x, y, col=1)
		panel.lmline(x, y, lty=4, col="blue", lwd=3)
	}	
)

old.prompt = grid::grid.prompt(T)
qqmath( ~ RT | Subject, data= lexdec, layout=c(4,4),
       prepanel = prepanel.qqmathline,
       panel = function(x, ...) {
          panel.qqmathline(x, col= 2, lty=1, ...)
          panel.qqmath(x, ...)
       }
)
grid::grid.prompt(old.prompt)

xylowess.fnc(RT ~ Frequency | Subject, data=lexdec)


## ---------------------------------------------------
# multilevel (aka mixed) model
## ---------------------------------------------------

lmer.1 <- lmer(RT ~ 1 + Frequency + SubjFreqRatio +
		    (1 | Subject)
		   , data=lexdec)
summary(lmer.1)

# the random effects
ranef(lmer.1)
var(ranef(lmer.1)[[1]])


## ---------------------------------------------------
# R2
## ---------------------------------------------------
summary(lm.full)
cor(fitted(lm.full), lexdec$RT)^ 2

cor(fitted(lmer.1), lexdec$RT) ^ 2


## ---------------------------------------------------
# use R2 comparisons to investigate
# item effects
# design effects
# effects of subject differences
## ---------------------------------------------------

lmer.item <- lmer(RT ~ 1 + Frequency + SubjFreqRatio +
		    (1 | Word) + (1 | Subject)
		   , data=lexdec)
cor(fitted(lmer.item), lexdec$RT) ^ 2


anova(lmer.1, lmer.item)




lmer.2 <- lmer(RT ~ 1 + Frequency + SubjFreqRatio +
		    (1 | Word) + (1 + Frequency | Subject)
		   , data=lexdec)
lmer.2








## ---------------------------------------------------
# full models
## ---------------------------------------------------

lmer.full = lmer(RT ~ 1 + Correct + Trial + PrevType * meanWeight + 
   Frequency + NativeLanguage * Length + (1|Subject) + (1|Word), 
   data = lexdec)
anova(lmer.full, lmer.1)

p <- pvals.fnc(lmer.full, withMCMC=T)
p$summary




lmer.4 <- lmer(RT ~ 1 + Correct + PrevCorrect + Trial + PrevType + meanWeight + 
   Frequency + NativeLanguage * Length + (1|Subject) + (1|Word), 
   data = lexdec)
lmer.4
anova(lmer.full, lmer.1)