data(warlpiri, package="languageR")

##--------------------------------------------
# This data set documents the use of ergative 
# case marking in the narratives of native speakers 
# of Lajamanu Warlpiri (8 children, 13 adults) 
# describing events in picture books. 
#
# O'Shannessy, C. (2006) Language contact and 
# child bilingual acquisition: Learning a mixed 
# language and Warlpiri in northern Australia, 
# PhD Thesis, University of Sydney, Australia. 
##--------------------------------------------

help(warlpiri, package="languageR")
str(warlpiri)
attach(warlpiri)

## this is an unbalanced data set, so we have to
#  be especially carefully to test for collinearity
#  (b/c it's basically bound to be present)
##
levels(Speaker)
table(Speaker)

# word order and case marking is also distributed
# heterogeneously 
summary(warlpiri)

##--------------------------------------------
# understanding the data
##--------------------------------------------
plot(WordOrder, AnimacyOfSubject)
plot(WordOrder, AnimacyOfObject)
plot(WordOrder, OvertnessOfObject)

plot(OvertnessOfObject, WordOrder)


##--------------------------------------------
# Let's have a look at the predictors 
##--------------------------------------------
library(languageR)
library(rpart)
w.rp = rpart(WordOrder ~ ., data = warlpiri[ , -c(1,2,5,9)])
w.rp
plotcp(w.rp)

w.pruned = prune(w.rp, cp = 0.021)
plot(w.pruned, margin = 0.1, compress = FALSE)
text(w.pruned, use.n = TRUE, pretty = 0, cex=0.8, fancy=F)

##--------------------------------------------
# lrm() in Design is a convenient interface for 
# logistic regression (also glm())
##--------------------------------------------
library(Design)
wo.lr <- lrm(WordOrder ~ AnimacyOfSubject + AnimacyOfObject + OvertnessOfObject)
vif(wo.lr)

# non-sequential test of factor impacts
anova(wo.lr)

##---------------------------------------------
# plotting ... just like for ols
# (both are Design functions)
##---------------------------------------------
wo.dd <- datadist(warlpiri)
options(datadist = 'wo.dd')

# To plot all predicted effects on one panel
# we need to set the graphics parameter par().
# To avoid the cumbersome syntax, let's define
# a function as shortcut.
multiplot <- function (x,y, ...) { par(mfrow=c(x,y), ...) }  
multiplot(2,3, lwd=2, cex=1.2)
plot(wo.lr, adj.subtitle=F)
plot(wo.lr, adj.subtitle=F, fun=plogis, ylim=c(.1,.6))


##---------------------------------------------
# adding an interaction ... just as for ols(), lm(), etc.
##---------------------------------------------
wo.case.lr <- lrm(WordOrder ~ CaseMarking * (AnimacyOfSubject + AnimacyOfObject + OvertnessOfObject))
wo.case.lr

# anova() creates separate summaries for interactions
# non-linearities, etc. ... just as for ols()

##--------------------------------------------
# how could we test the predictions of harmonic
# alignment/obviation theories (word order should
# be affected, if the patient OUTRANKS the agent
# in terms of saliency, e.g. animacy)
##--------------------------------------------
wo.int.lr <- lrm(WordOrder ~ AnimacyOfSubject * AnimacyOfObject + OvertnessOfObject)
wo.int.lr
anova(wo.int.lr)

OSoutrank <- ifelse(AnimacyOfSubject == "inanimate" & AnimacyOfObject != "inanimate", 1, 0)
wo.i.lr <- lrm(WordOrder ~ OSoutrank + OvertnessOfObject)
wo.i.lr

# what would you decide? 
# 1) pro "animacy of object only matters"
# 2) pro "harmonic alignment theories"

