## -----------------
# load libraries
## -----------------

library(languageR)
library(ggplot2)
library(Design)

## -----------------
# create data set - set parameters
## -----------------

# number of conditions
ncond = 4

# number of subjects
nsubj = ncond * 4

# number of items per subject
nitem = ncond * 6

# length of critical time window in seconds
nsec = .4

# samples per second
nsecpersamp = .004
nsamppersec = 1 / nsecpersamp

# msecs per bin
nsecperbin = .04
nsampperbin = nsecperbin / nsecpersamp

# noise parameters
sigma_subj_int = 0.5
sigma_item_int = 0.15
# could add extra noise, beyond binomial
# as I understand, this would be a way to
# introduce overdispersion
# sigma = .65

# effects (log-odds of looks to target)
intercept = .3
beta_condA = .7
beta_condB = -1.5
beta_condAB = -.3

# time curve
beta_time = .5
beta_time2 = -1.5
beta_condAtime = .4
beta_condAtime2 = 0
beta_condBtime = .1
beta_condBtime2 = 0
beta_condABtime = -.8
beta_condABtime2 = 0

# no interactions of design factors with time
beta_EarlyTargetLook_condA = 0
beta_EarlyTargetLook_condB = 0
beta_EarlyTargetLook_condAB = 0
beta_EarlyTargetLook_time = 0
beta_EarlyTargetLook_time2 = 0
beta_EarlyTargetLook_condAtime = 0
beta_EarlyTargetLook_condAtime2 = 0
beta_EarlyTargetLook_condBtime = 0
beta_EarlyTargetLook_condBtime2 = 0
beta_EarlyTargetLook_condABtime = 0
beta_EarlyTargetLook_condABtime2 = 0

# data loss by subject (loss rates given in log-odds)
mean_loss_per_subject = qlogis(.03)
sigma_loss_per_subject = abs(mean_loss_per_subject / 3)

## -----------------
# functions
## -----------------
myCenter = function(x) x - mean(x, na.rm=T)

simpleformula = function(d) {
	d$y = intercept + d$eps_subj_int + d$eps_item_int + 
		beta_condA * d$cCondWordFrequencyHigh +
		beta_condB * d$cCondCompetitionHigh +
		beta_condAB * d$cCondWordFrequencyHigh * d$cCondCompetitionHigh +
		beta_time * d$cTime +
		beta_condAtime * d$cCondWordFrequencyHigh * d$cTime +
		beta_condBtime * d$cCondCompetitionHigh * d$cTime +
		beta_condABtime * d$cCondWordFrequencyHigh * d$cCondCompetitionHigh * d$cTime +
		beta_time2 * d$cTime^2 +
		beta_condAtime2 * d$cCondWordFrequencyHigh * d$cTime^2 +
		beta_condBtime2 * d$cCondCompetitionHigh * d$cTime^2 +
		beta_condABtime2 * d$cCondWordFrequencyHigh * d$cCondCompetitionHigh * d$cTime^2 
	d$LooksToTarget = as.factor(sapply(plogis(d$y), FUN= function(x) { rbinom(n=1, size=1, x) } ))
	return(d)
}

## -----------------
# set up data frame with Latin Square design
## -----------------
d = data.frame(
	Subj = as.factor(sort(rep(1:nsubj, nitem * nsec * nsamppersec))),
	Item = as.factor(rep(sort(rep(1:nitem, nsec *  nsamppersec)), nsubj)),
	Sample = rep(1:(nsec*nsamppersec), nitem * nsubj)
)
d$Cond = as.factor(((as.numeric(as.character(d$Subj)) - 1) + as.numeric(as.character(d$Item))) %% ncond + 1)
d$Time = d$Sample * nsecpersamp
d$cTime = myCenter(d$Time)
d$Bin = ceiling(d$Sample / nsampperbin)

# defining 2 x 2 design of CondA * CondB
d$CondWordFrequency = ifelse(d$Cond %in% c(1,2), "high", "low")
d$cCondWordFrequencyHigh = ifelse(d$CondWordFrequency == "high", .5, -.5)
d$CondCompetitors = ifelse(d$Cond %in% c(1,3), "one", "two")
d$cCondCompetitionHigh = ifelse(d$CondCompetitors == "two", .5, -.5)

# check design
with(d, table(Subj, Item))
with(d, table(Item, Cond))
with(d, table(Item, Cond, Subj))
with(d, table(Bin))

## -----------------
# set up data
## -----------------
# normally distributed error by subject and item intercept 
# plus residual error
d$eps_subj_int = rnorm(nsubj, 0, sqrt(sigma_subj_int))[d$Subj]
d$eps_item_int = rnorm(nitem, 0, sqrt(sigma_item_int))[d$Item]
# d$eps = rnorm(nrow(d), 0, sqrt(sigma))

## could add random data loss or loss by subject (with different rates of 
## loss for different subjects) 

d = simpleformula(d)
d$Subj = as.factor(d$Subj)
d$Item = as.factor(d$Item)

setwd("C:\\Users\\tiflo\\Documents\\My LabMeetings, Tutorials, & Teaching\\Regression Workshops\\Iowa11\\scripts")
write.csv(d, file = "eye-tracking-sample-complete.csv", quote = F, row.names = F)

# remove data that is "lost"
d = subset(d, 1 != sapply(plogis(rnorm(nsubj, mean_loss_per_subject, sqrt(sigma_loss_per_subject))[d$Subj]), FUN= function(x) { rbinom(n=1, size=1, x) }) )

write.csv(d[,-which(names(d) %in% c('Cond','y','Exclude','eps_subj_int','eps_item_int'))], file = "eye-tracking-sample.csv", quote = F, row.names = F)

