library(languageR)

## ---------------------------------------------------
# Lexical decision time data
#
# Q: How can we construct a good/ the best frequency
#    estimate from a variety of sources?
## ---------------------------------------------------
data(lexdec)

str(lexdec)
l <- aggregate(lexdec[,c('Frequency','BNCw','BNCc','BNCd')], by= list(Word = lexdec$Word), FUN= mean)
row.names(l) <- l$Word
l$Word <- NULL

p <- princomp(l, center=T)

# summary of principal components
summary(p)
plot(p, main="Screeplot of frequency components", xlab="Components", cex.lab=1.2)

# interpretation of principal components
biplot(p, col=c('darkgray','black'))
biplot(p, scale=0)