data(warlpiri, package="languageR")

##--------------------------------------------
# This data set documents the use of ergative 
# case marking in the narratives of native speakers 
# of Lajamanu Warlpiri (8 children, 13 adults) 
# describing events in picture books. 
#
# O'Shannessy, C. (2006) Language contact and 
# child bilingual acquisition: Learning a mixed 
# language and Warlpiri in northern Australia, 
# PhD Thesis, University of Sydney, Australia. 
##--------------------------------------------

help(warlpiri, package="languageR")
str(warlpiri)
attach(warlpiri)

## this is an unbalanced data set, so we have to
#  be especially carefully to test for collinearity
#  (b/c it's basically bound to be present)
##
levels(Speaker)
table(Speaker)

# word order and case marking is also distributed
# heterogeneously 
summary(warlpiri)

##--------------------------------------------
# understanding the data
##--------------------------------------------
plot(WordOrder, AnimacyOfSubject)
plot(WordOrder, AnimacyOfObject)
plot(WordOrder, OvertnessOfObject)

##--------------------------------------------
# Let's have a look at the predictors 
##--------------------------------------------
library(languageR)
library(rpart)
w.rp = rpart(WordOrder ~ ., data = warlpiri[ , -c(1,2,5,9)])
w.rp
plotcp(w.rp)

w.pruned = prune(w.rp, cp = 0.021)
plot(w.pruned, margin = 0.1, compress = FALSE)
text(w.pruned, use.n = TRUE, pretty = 0, cex=0.8, fancy=F)

##--------------------------------------------
# lrm() in Design is a convenient interface for 
# logistic regression (also glm())
##--------------------------------------------
library(Design)
wo.lr <- lrm(WordOrder ~ AnimacyOfSubject + AnimacyOfObject + OvertnessOfObject)
vif(wo.lr)

# non-sequential test of factor impacts
anova(wo.lr)

##---------------------------------------------
# plotting ... just like for ols
# (both are Design functions)
##---------------------------------------------
wo.dd <- datadist(warlpiri)
options(datadist = 'wo.dd')

# To plot all predicted effects on one panel
# we need to set the graphics parameter par().
# To avoid the cumbersome syntax, let's define
# a function as shortcut.
multiplot <- function (x,y, ...) { par(mfrow=c(x,y), ...) }  
multiplot(2,3, lwd=2, cex=1.2)
plot(wo.lr, adj.subtitle=F)
plot(wo.lr, adj.subtitle=F, fun=plogis, ylim=c(.1,.6))


##---------------------------------------------
# adding an interaction ... just as for ols(), lm(), etc.
##---------------------------------------------
wo.case.lr <- lrm(WordOrder ~ CaseMarking * (AnimacyOfSubject + AnimacyOfObject + OvertnessOfObject))
wo.case.lr

# anova() creates separate summaries for interactions
# non-linearities, etc. ... just as for ols()

##--------------------------------------------
# how could we test the predictions of harmonic
# alignment/obviation theories (word order should
# be affected, if the patient OUTRANKS the agent
# in terms of saliency, e.g. animacy)
##--------------------------------------------
wo.int.lr <- lrm(WordOrder ~ AnimacyOfSubject * AnimacyOfObject + OvertnessOfObject)
wo.int.lr
anova(wo.int.lr)

OSoutrank <- ifelse(AnimacyOfSubject == "inanimate" & AnimacyOfObject != "inanimate", 1, 0)
wo.i.lr <- lrm(WordOrder ~ OSoutrank + OvertnessOfObject)
wo.i.lr

# what would you decide? 
# 1) pro "animacy of object only matters"
# 2) pro "harmonic alignment theories"


##--------------------------------------------
# using lmer() to run multilevel logit models
# (logistic regression w/ random effects)
##--------------------------------------------

# specify the family to be "binomial"
wo.lmer <- lmer(WordOrder ~ AnimacyOfSubject + AnimacyOfObject + OvertnessOfObject +
	(1 | Speaker)
	, family="binomial")
wo.lmer

# NB: changes in what is significant!!!
# 
# the variance associated with speaker is large
# compared to the fixed coefficients.

# NB: anova does NOT work (for a good reason)
# mixed logit models are fit using quasi-likelihoods
# - so, strictly speaking, the chisq test is not 
# appropriate (neither the F-test).
anova(wo.lmer, test="Chisq")

wo.age.lmer <- lmer(WordOrder ~ (AnimacyOfSubject + AnimacyOfObject + OvertnessOfObject)
	+ AgeGroup + (1 | Speaker)
	, family="binomial")
wo.age.lmer

## Let's capture some more levels of random variance we're 
#  not per se interested in
##
wo.ran.lmer <- lmer(WordOrder ~ (AnimacyOfSubject + AnimacyOfObject + OvertnessOfObject)
	+ AgeGroup + (1 | Speaker) + (1 | Sentence) + (1 | Text)
	, family="binomial")
wo.ran.lmer

# NB: fixed effect correlations can change when more 
#     random effects are included (that's b/c these 
#     are partial correlations, conditioned on every-
#     thing else in the model

# NB: the Text and Sentence effects are identical?!?
#     and very very small.
#     ... but there are not the same variable 
table(Sentence, Text)

# what's going on?
# LR hasn't changed between the model w/ only 
# random subject effects and the model w/ the 
# additional random effects.

##--------------------------------------------
# does sentence complexity matter?
##--------------------------------------------
Length <- nchar(gsub("[a-z]", "", Sentence), type="chars")

n.lmer <- lmer(WordOrder ~ AnimacyOfSubject + AnimacyOfObject + OvertnessOfObject
	+ Length + AgeGroup + (1 | Speaker)
	, family="binomial")
n.lmer

##--------------------------------------------
# plotting (based on G&H07)
##--------------------------------------------
library(arm)

# let's define some functions
jitter.binary <- function(binvar, jitter=.05) {
	ifelse(binvar == 0, runif(length(binvar), 0, jitter), runif(length(binvar), 1-jitter, 1))
}

# general structure of plotting
# plot.binary <- function(binvar, predictor, ...) {
#	plot(jitter(predictor), jitter.binary(binvar), pch=".", cex=2, ...)
#}

plot(jitter(Length), 
	jitter.binary(ifelse(WordOrder == "subInitial", 1, 0)), 
	pch=".", cex=2, cex.lab=1.2, cex.main=1.5,
	xlab= "Sentence length (in words)",
	ylab= "Predicted probability",
	main= "Probability of subject-initial word order")
curve(invlogit( sum(fixef(n.lmer) * c(1,0,1,1,0,0)) + fixef(n.lmer)[5] * x), 
	add=T, lwd=1.5, col="darkgray")
curve(invlogit( sum(fixef(n.lmer) * c(1,1,1,1,0,0)) + fixef(n.lmer)[5] * x), 
	add=T, lty=2, lwd=1.5)
legend(locator(1), legend=c('inanimate', 'animate'), 
	title="Subject animacy", lty=c(1,2), col=c('darkgrey','black'), 
	lwd=1.5, cex=1.1)

##--------------------------------------------
# visualization
##--------------------------------------------

plot(as.numeric(WordOrder), as.numeric(AnimacyOfSubject))
curve(c(1,1,0,1,x,1) * fixef(n.lmer) )