# predictor 1, binary
x1 <- rep(c("a","b"),160)
x1

# predictor 2, ternary
x2 <- c(rep("c",40), rep("d", 40), rep("e", 40), rep("f", 40))
x2

# effects for x2 (let's assume x1 has no effect)
e <- c(rep(0,20),rep(-1.5,20), rep(0.5, 20), rep(0.5,20))

# random noise
n <- rnorm(160,0,1.2)
mean(n)

# y is effect + noise
y <- e + n

d <- as.data.frame(cbind(x1,x2,y))
d
d$y <- as.numeric(as.character(d$y))

d$x1b = ifelse(d$x1 == "b", 1, 0)
d$x2d = ifelse(d$x2 == "d", 1, 0)
d$x2e = ifelse(d$x2 == "e", 1, 0)
d$x2f = ifelse(d$x2 == "f", 1, 0)
with(d, cor(x2d, x2e))
with(d, cor(x2e, x2f))
with(d, cor(x2d, x2f))

# r2 same as correlation squared as long as pairs of predictors are used
summary(lm(x2d ~ x2e, d)) 
summary(lm(x2d ~ x2f, d)) 
summary(lm(x2e ~ x2f, d)) 
summary(lm(x2d ~ x2e + x2f, d)) 

# run linear regression
library(Design)
dd = datadist(d)
options(datadist='dd')
l <- ols(y ~ x1 + x2, d)
l
vif(l)
summary(l)

# R implicitly coded x1 and x2 as dummy/treatment coded

# look at l
# x1=b should have no effect (we haven't given it any)
# x2=d should be about -1.5 and probably significant (differs each run)
# x2=e should be 0.5 (see above) and will sometimes be significant
l

# now look at collinearity for x1 and x2. 
# x1 should be absolutely non-collinear (balanced design)
# x2 should be somewhat collinear (1.333). this will be the case in every
#  run of this script, no matter how other things change --> 
#  it does not depend on the dependent variable, only on the coding and the
#  distribution of the predictor levels
vif(l)

# recall what vif means (see e.g. wikipedia)
# calculate R2 of predicting x2 == d given x2 == e 
# (whether x1 is also in the model does not matter <-- balanced design)
rsq <- ols(x2 == "d" ~ I(x2 == "e") + I(x1 == "b"), d)$stats["R2"]

# predicted VIF = 1 / (1-rsq) should be 1.3333
as.numeric(1 / (1 - rsq))

# e voila!

# different coding does not help
# the different levels are inherently related. 
# so knowing the values of one contrast tells
# you about the probability of values for other contrasts
contrasts(d$x2) <- contr.sum(3)
l.contr <- ols(y ~ x1 + x2, d)
l.contr
vif(l.contr)