# Nonlinear Regression Lecture
# 2/20/2011

# Ting Qian

# Preliminary materials
# Introduction parts

# Simulation 1
logigrowth = function (t) {
  phi1 = 2
  phi2 = 3
  phi3 = 4
  phi1 / (1 + exp(-(t-phi2)) / phi3)
}

sim.t = seq(-7, 7, by=0.1)
sim.response = logigrowth(sim.t) + rnorm(length(sim.t), 0, 0.3)
plot(sim.t, sim.response)

# use fifth-order polynormial
library(Design)
sim.pol.fit <- lm(sim.response ~ pol(sim.t, 5))
summary(sim.pol.fit)

# use a nls fit
sim.nls.fit <- nls(sim.response ~ phi1 / (1 + exp(-(sim.t - phi2)) / 1),
                   trace=TRUE)
summary(sim.nls.fit)

# summarize the results graphically
png("logit-vs-poly.png")
par(mar=c(4,4,1,1))
plot(sim.t, sim.response, cex.lab=1.2, cex.axis=1.5, cex=1.5)
lines(sim.t, predict(sim.pol.fit, data.frame(sim.t)), col="darkgreen", lwd=2)
lines(sim.t, predict(sim.nls.fit, sim.t), col="blue", lwd=2)
lines(sim.t, logigrowth(sim.t), lty=2, lwd=2, col="red")
legend("topleft", c("5th Polynomial", "Nonlinear", "Actual"), col=c("darkgreen", "blue", "red"), lty=c(1,1,2), lwd=2)
dev.off()

###########################

# Nonlinear modeling in action
infodata <- read.table("discourse-info.tab", sep="\t", header=T)
# load the nonlinear mixed model library
library(nlme)

# Model 1: General exponential growth fitted directly to the data
exp.growth <- nlme(perWordInfo ~ YMax * (1 - exp(-1 * Rate * (SId - 1))) + Baseline,
                   data = subset(infodata, LangId=="Mandarin Chinese"),
                   fixed = YMax + Baseline + Rate ~ 1,
                   random = YMax + Baseline + 1 ~ 1 | DocId,
                   start = c(YMax = 2, Baseline = 5, Rate = 1))

# Model 2: Nonlinear model derived from the assumption that the weight of a contextual cue decays exponentially
cue.exp.decay <-nlme(perWordInfo ~ (CInfo / (exp(Rate) - 1)) * (1 - exp(Rate * (1 - SId))) + CInfo,
                     data = subset(infodata, LangId=="Mandarin Chinese"),
                     fixed = CInfo + Rate  ~ 1,
                     random = CInfo ~ 1 | DocId,
                     start = c(CInfo = 4, Rate = 0.5))

# compare Model 1 with Model 2
anova(exp.growth, cue.exp.decay)

# plot the population-level (fixed) effects
plot(seq(1,15), predict(exp.growth, data.frame(SId=seq(1,15), DocId=rep(2,15)), level=0), type="l")
lines(seq(1,15), predict(cue.exp.decay, data.frame(SId=seq(1,15), DocId=rep(2,15)), level=0), lwd=2, col="red")

# Power law
pow.growth <-nlme(perWordInfo ~ YMax * (1 / SId ^ Rate) + Baseline,
                  data = subset(infodata, LangId=="Mandarin Chinese"),
                  fixed = YMax + Rate + Baseline ~ 1,
                  random = YMax + Baseline ~ 1 | DocId,
                  start = c(YMax = 6, Rate = 2, Baseline = 1))

cue.pow.decay <- nlme(perWordInfo ~ CInfo * (SId ^ (1 - Rate) / (1 - Rate) - 1 / (1 - Rate)) + CInfo,
                      data = subset(infodata, LangId=="Mandarin Chinese"),
                      fixed = CInfo + Rate ~ 1,
                      random = CInfo ~ 1 | DocId,
                      start = c(CInfo = 6, Rate = 2))

# compare Model 1 with Model 2
anova(pow.growth, cue.pow.decay)

# plot the population-level (fixed) effects
with(subset(infodata, LangId=="Mandarin Chinese"),
     plot(perWordInfo ~ SId))
lines(seq(1,15), predict(pow.growth, data.frame(SId=seq(1,15), DocId=rep(2,15)), level=0), type="l")
lines(seq(1,15), predict(cue.pow.decay, data.frame(SId=seq(1,15), DocId=rep(2,15)), level=0), lwd=2, col="red")

# What if we use double log-transformation to model the effect of a power law
library(lme4)
pow.a.lmer <- lmer(log(perWordInfo) ~ (1|DocId) + log(SId), data = subset(infodata, LangId=="Mandarin Chinese"), REML=F)

# Quick and Dirty nonlinear modeling: nls
pow.nls <- nls(perWordInfo ~ CInfo * (SId ^ (1 - Rate) / (1 - Rate) - 1 / (1 - Rate)) + CInfo,
               data = subset(infodata, LangId=="Mandarin Chinese"),
               start = list(CInfo = 6, Rate = 2))
# add this line to the previous plot
lines(seq(1,15), predict(pow.nls, data.frame(SId=seq(1,15)), type="l", col="green", lwd=2, lty=2))
plot(pow.nls)

# Can we model all languages at the same time?
all.cue.pow.decay <- nlme(perWordInfo ~ CInfo * (SId ^ (1 - Rate) / (1 - Rate) - 1 / (1 - Rate)) + CInfo,
                          data = subset(infodata),
                          fixed = CInfo + Rate ~ 1,
                          random = CInfo ~ 1 | LangId/DocId,
                          start = c(CInfo = 6, Rate = 2))

all.cue.exp.decay <-nlme(perWordInfo ~ (CInfo / (exp(Rate) - 1)) * (1 - exp(Rate * (1 - SId))) + CInfo,
                         data = subset(infodata),
                         fixed = CInfo + Rate  ~ 1,
                         random = CInfo ~ 1 | LangId/DocId,
                         start = c(CInfo = 4, Rate = 0.5))

anova(all.cue.exp.decay, all.cue.pow.decay)

# random hacks don't work well
hack1.cue.exp.decay <-nlme(perWordInfo ~ (CInfo / (exp(Rate) - 1)) * (1 - exp(Rate * (1 - SId))) + CInfo + SId,
                           data = subset(infodata, LangId=="Mandarin Chinese"),
                           fixed = CInfo + Rate  ~ 1,
                           random = CInfo ~ 1 | DocId,
                           start = c(CInfo = 4, Rate = 0.5))


### If there is still time and demand:

## Topic 1: self-starting functions ###
ssasymp.growth <- nlme(perWordInfo ~ SSasympOrig(SId, Asym, lrc),
                       data = subset(infodata, LangId=="Mandarin Chinese"),
                       fixed = Asym + lrc ~ 1,
                       random = Asym + lrc ~ 1 | DocId,
                       start = c(Asym = 8, lrc=2))

lines(seq(1,15), predict(ssasymp.growth, data.frame(SId=seq(1,15)), level=0), type="l", col="green", lwd=2)

ssasymp.growth <- nls(perWordInfo ~ SSasympOrig(SId, Asym, R0),
                       data = subset(infodata, LangId=="Mandarin Chinese"))

## Topic 2: nnet
library(nnet)
nnet.growth.1 <- nnet(perWordInfo ~ SId + DocId, data = subset(infodata, LangId=="Mandarin Chinese"),
                      size = 10, maxit = 2000, decay = 1e-3, skip = T, linout = T)

pred.matrix = matrix(predict(nnet.growth.1),15,length(predict(nnet.growth.1))/15)
average.pred = rowMeans(pred.matrix)
plot(seq(1,15), average.pred, type="l")
persp(seq(1,15), seq(1,10), pred.matrix, theta=320, phi=20, r =5, xlab="sentence position", ylab="docid")

nnet.growth.2 <- nnet(perWordInfo ~ SId + DocId + OOVCount, data = subset(infodata, LangId=="Mandarin Chinese"),
                      size = 25, maxit = 1000, decay = 1e-3, skip = T, linout = T)
pred.matrix = matrix(predict(nnet.growth.2),15,length(predict(nnet.growth.2))/15)
average.pred = rowMeans(pred.matrix)
plot(seq(1,15), average.pred, type="l")
persp(seq(1,15), seq(1,10), pred.matrix, theta=320, phi=20, r =5, xlab="sentence position", ylab="docid")