## @knitr Options, include=F, cache=F, message=F
opts_chunk$set(size='footnotesize', eval=T, prompt=F, message=F, tidy=F, fig.width=4, fig.height=4)


## @knitr stargazerexamples, echo=F, results='asis', cache=T
library(languageR)
library(stargazer)
data(lexdec)
lexdec$CorrectResponse = with(lexdec, Correct == "correct")
g1a = lm(RT ~ Frequency * NativeLanguage + Trial, data=lexdec)
g1b = lm(RT ~ Frequency * NativeLanguage, data=lexdec)
g2 = glm(CorrectResponse ~ Frequency * NativeLanguage, data=lexdec, family=binomial)
stargazer(g1a,g1b,g2,
  align=TRUE, 
  dep.var.labels=c("(logged) RT\n","Correct response?"),
  covariate.labels=c("Intercept", "Word frequency (logged)","Native language","Trial position","Word frequency (logged):Native language"),
  omit.stat=c("LL","ser","f"),
  intercept.top=T, 
  title="Example Stargazer table generated from R"
)


## @knitr Preliminaries, echo=T, results='markup', cache=T, message=F
version
ls()


## @knitr linearcombination, echo=F, results='markup', cache=T
d = read.csv("scripts/discourse-info.tab", sep="\t", header=T)
d = d[sample(1:nrow(d), 8),]
print(d)


## @knitr linearcombination2, echo=F, results='markup', cache=T
print(d)


## @knitr LoadData1, echo=F, results='markup', cache=T, message=F
library(languageR)
data(lexdec)
# randomly sampling rows from lexdec
head(lexdec[sample(x=1:nrow(lexdec), size=20, replace=F),c(1:5, 9:10)])


## @knitr lexdec, echo=T, eval=F, cache=T, message=F
library(languageR)
data(lexdec)

# Try some of the following:
help(lexdec)      # learn about this R object
?lexdec           # the same
class(lexdec)     # what type of R object is lexdec?
nrow(lexdec)      # number of rows (cases)
str(lexdec)       # works on almost all R objects
summary(lexdec)   # summary of each variable in the data.frame
summary(lexdec$RT)
mean(lexdec$RT)
var(lexdec$RT)
with(lexdec, mean(RT))

# create a new data.frame that is a subset of lexdec
new = subset(lexdec, RT > 7)   
nrow(new)
summary(new$RT)


## @knitr IllustrateData1, echo=T, results='markup', cache=T, message=F
summary(lexdec[,c('RT','Frequency')])


## @knitr LinearModelExample1, echo=T, results='markup', cache=T, message=F
# glm() is R's call to a GLM
# 1, in an R-formula, is a specific symbol for the intercept
# Frequency and RT are variables in the data.frame lexdec
# data tells glm which data.frame to use
# family tells glm which distributions the outcome is assumed to have
m = glm(RT ~ 1 + Frequency, data = lexdec, family = gaussian)


## @knitr LinearModelExample1b, echo=T, results='hide', cache=T, message=F
library(languageR)
data(lexdec)
glm(RT ~ 1 + Frequency, data = lexdec, family = gaussian)
glm(RT ~ Frequency, data = lexdec, family = gaussian)
glm(RT ~ Frequency, data = lexdec)


## @knitr LinearModelExample1c, echo=T, eval=F, cache=T, message=F
glm(RT ~ - 1 + Frequency, data = lexdec)


## @knitr LinearModelExample2, echo=T, results='markup', cache=T, message=F, size='scriptsize'
summary(m)$coefficients[,1:2]
sqrt(summary(m)$dispersion)


## @knitr LinearModelInterceptExample1, echo=T, results='markup', cache=T, message=F
m0 = glm(RT ~ 1, data = lexdec, family = gaussian)


## @knitr LinearModelInterceptExample2, echo=T, results='markup', cache=T, message=F
summary(m0)$coefficients[,1:2]


## @knitr mean, echo=T, results='markup', cache=T, message=F
options(digits=6)
mean(lexdec$RT)


## @knitr LinearModelInterceptVisualization1, echo=T, results='markup', cache=T, fig.height=2, fig.width=3
par(cex=.7, mar=c(4,4,0.1,0.1))
plot(x = lexdec$Frequency, 
     y = lexdec$RT, 
     ylab = "(log-transformed) RT", 
     xlab = "(log-transformed) word frequency", 
     type = "n"
)
points(x = lexdec$Frequency, y = lexdec$RT, pch=1, cex=.1, col = "grey")
abline(m0, col = "magenta")


## @knitr LinearModelInterceptVisualization2, echo=F, results='markup', cache=T, fig.height=1.5, fig.width=3
par(cex=.65, mar=c(4,4,0.5,0.1))
plot(x = exp(lexdec$Frequency), 
     y = lexdec$RT, 
     ylab = "(log-transformed) RT", 
     xlab = "word frequency", 
     type = "n"
)
points(x = exp(lexdec$Frequency), y = lexdec$RT, pch=1, cex=.1, col = "grey")
o = order(exp(lexdec$Frequency))
lines(x = exp(lexdec$Frequency[o]), y = predict(m0)[o], col = "magenta")

plot(x = exp(lexdec$Frequency), 
     y = exp(lexdec$RT), 
     ylab = "RT", 
     xlab = "word frequency", 
     type = "n"
)
points(x = exp(lexdec$Frequency), y = exp(lexdec$RT), pch=1, cex=.1, col = "grey")
lines(x = exp(lexdec$Frequency[o]), y = exp(predict(m0)[o]), col = "magenta")


## @knitr LinearModelVisualization1, echo=F, results='markup', cache=T, fig.height=2, fig.width=3
par(cex=.7, mar=c(4,4,1,0.1))
plot(x = lexdec$Frequency, 
     y = lexdec$RT, 
     ylab = "(log-transformed) RT", 
     xlab = "(log-transformed) word frequency", 
     type = "n"
)
points(x = lexdec$Frequency, y = lexdec$RT, pch=1, cex=.1, col = "grey")
abline(m, col = "green")


## @knitr LinearModelVisualization2, echo=F, results='markup', cache=T, fig.height=1.5, fig.width=3
par(cex=.65, mar=c(4,4,0.5,0.1))
plot(x = exp(lexdec$Frequency), 
     y = lexdec$RT, 
     ylab = "(log-transformed) RT", 
     xlab = "word frequency", 
     type = "n"
)
points(x = exp(lexdec$Frequency), y = lexdec$RT, pch=1, cex=.1, col = "grey")
lines(x = exp(lexdec$Frequency[o]), y = predict(m)[o], col = "green")

plot(x = exp(lexdec$Frequency), 
     y = exp(lexdec$RT), 
     ylab = "RT", 
     xlab = "word frequency", 
     type = "n"
)
points(x = exp(lexdec$Frequency), y = exp(lexdec$RT), pch=1, cex=.1, col = "grey")
lines(x = exp(lexdec$Frequency[o]), y = exp(predict(m)[o]), col = "green")


## @knitr LinearModelExample3, echo=F, results='markup', cache=T, message=F
summary(m)$coefficients


## @knitr LinearModelExample4, echo=T, results='markup', cache=T, message=F
summary(m)


## @knitr LinearModelTwoPredictorsExample1, echo=T, results='markup', cache=T, message=F
summary(glm(RT ~ Frequency + Trial, data = lexdec))$coefficients


## @knitr LinearModelTwoPredictorsVisualziation1, echo=F, results='markup', cache=T, message=F, fig.width=3.5, fig.height=3.5
library(scatterplot3d)
m2 = lm(RT ~ Frequency + Trial, data = lexdec)
par(cex = .8, cex.lab = .6)
s = scatterplot3d(x=lexdec$Frequency, y=lexdec$Trial, 
  z=lexdec$RT,
	xlab="Word Frequency\n(log-transformed)", 
	ylab="Trial number",
	zlab="Response latency\n(in log-transformed msecs)", 
	main="Predicting Lexical Decision RTs"
)
s$plane3d(m2, col= "purple", lwd=2, lty.box = "solid")


## @knitr LinearModelThreePredictorsExample1, echo=T, results='markup', cache=T, message=F
s = summary(glm(RT ~ Frequency + Trial + NativeLanguage, data = lexdec))
s$coefficients


## @knitr LinearModelThreePredictorsVisualziation1, echo=F, results='markup', cache=T, message=F, fig.width=3.5, fig.height=3
m3 = lm(RT ~ Frequency + Trial + NativeLanguage, data = lexdec)

par(cex = .8, cex.lab = .6)
s = scatterplot3d(x=lexdec$Frequency, y=lexdec$Trial, 
  z=lexdec$RT,
  xlab="Word Frequency\n(log-transformed)", 
	ylab="Trial number",
	zlab="Response latency\n(in log-transformed msecs)", 
	main="Predicting Lexical Decision RTs",
  sub="Native Speakers (red) and\nNon-Native Speakers (purple)"
)
s$plane3d(coef(m3)[1], coef(m3)[2], coef(m3)[3], 
	col= "red", lwd=2, 
	lty.box = "solid")
s$plane3d(coef(m3)[1] + coef(m3)[4], coef(m3)[2], coef(m3)[3], 
	col= "purple", lwd=2, 
	lty.box = "solid")


## @knitr LinearModelInteractionExample1, echo=T, results='markup', cache=T, message=F
m4 = glm(RT ~ 
           Frequency + 
           Trial + 
           NativeLanguage + 
           Frequency:NativeLanguage, 
         data = lexdec
)
summary(m4)$coefficients


## @knitr LinearModelInteractionVisualziation1, echo=F, results='markup', cache=T, message=F, fig.width=3.5, fig.height=3
m4 = lm(RT ~ Frequency + Trial + NativeLanguage + Frequency:NativeLanguage, data = lexdec)

par(cex = .8, cex.lab = .6)
s = scatterplot3d(x=lexdec$Frequency, y=lexdec$Trial, 
  z=lexdec$RT,
  xlab="Word Frequency\n(log-transformed)", 
  ylab="Trial number",
	zlab="Response latency\n(in log-transformed msecs)", 
	main="Predicting Lexical Decision RTs",
  sub="Interaction with Native Speakers (red) and\nNon-Native Speakers (purple)"
)
s$plane3d(coef(m4)[1], coef(m4)[2], coef(m4)[3], 
	col= "red", lwd=2, 
	lty.box = "solid")
s$plane3d(coef(m4)[1] + coef(m4)[4], coef(m4)[2] + coef(m4)[5], coef(m4)[3], 
	col= "purple", lwd=2, 
	lty.box = "solid")


## @knitr LinearModelInteractionExample2, echo=F, results='markup', cache=T, message=F
summary(m4)$coefficients