attachment:VanDurmeSession2.R of HLPMiniCourseSession2

Attachment 'VanDurmeSession2.R'

   1 ## 	Benjamin Van Durme, vandurme@cs.rochester.edu,  4 Jun 2008
   2 ## 	Time-stamp: <2008-06-06 15:19:16 vandurme>
   3 
   4 ## setup environment
   5 library(languageR)
   6 library(Design)
   7 
   8 ########################################
   9 ## Baayen 6.1
  10 
  11 data(english)
  12 
  13 ## Create english2 as according to the text
  14 
  15 items = english[english$AgeSubject=="young",]
  16 items.pca = prcomp(items[,c(18:27)],center=T,scale=T)
  17 x= as.data.frame(items.pca$rotation[,1:4])
  18 items$PC1 = items.pca$x[,1] 
  19 items$PC2 = items.pca$x[,2] 
  20 items$PC3 = items.pca$x[,3] 
  21 items$PC4 = items.pca$x[,4] 
  22 items2 = english[english$AgeSubject != "young", ] 
  23 items2$PC1 = items.pca$x[,1] 
  24 items2$PC2 = items.pca$x[,2] 
  25 items2$PC3 = items.pca$x[,3] 
  26 items2$PC4 = items.pca$x[,4] 
  27 english2 = rbind(items, items2)
  28 
  29 # english2$NVratio = log(english2$NounFrequency+1) - log(english2$VerbFrequency+1) 
  30 
  31 ## Create datadist object to inform Design of ranges for plots
  32 
  33 english2.dd = datadist(english2)
  34 options(datadist = "english2.dd")
  35 
  36 ## Look at WrittenFrequency compared to lexdec
  37 
  38 plot(english2$RTlexdec, english2$WrittenFrequency)
  39 
  40 ## It looks there are two distributions there, perhaps old vs young?
  41 
  42 with(english2[english2$AgeSubject == "old",],plot(WrittenFrequency, RTlexdec,ylim=c(6,7.5)))
  43 with(english2[english2$AgeSubject == "young",],points(WrittenFrequency, RTlexdec, col="blue"))
  44  
  45 
  46 ## Fit a model as according to the instructions
  47 
  48 model1.0 = ols( RTlexdec ~ AgeSubject + WrittenFrequency + PC1, data = english2)
  49 model2 = ols( RTnaming ~ AgeSubject + WrittenFrequency + PC1, data = english2)
  50 
  51 ## interaction between WrittenFrequency and AgeSubject
  52 model1.1 = ols( RTlexdec ~ AgeSubject + WrittenFrequency + PC1 + WrittenFrequency * AgeSubject, data = english2)
  53 
  54 par(mfrow=c(2,2))
  55 plot(model1.1)
  56 par(mfrow=c(1,1))
  57 
  58 ## add restricted cubic spline on WrittenFrequency
  59 model1.2 = ols( RTlexdec ~  AgeSubject * (rcs(WrittenFrequency, 3) + PC1), data = english2)
  60 
  61 par(mfrow=c(2,2))
  62 plot(model1.2)
  63 par(mfrow=c(1,1))
  64 
  65 ## PC1 is not significant in the above model, why? The assignment suggests a
  66 ## 3knot rcs to make it non-linear
  67 
  68 model1.3 = ols( RTlexdec ~  AgeSubject * (rcs(WrittenFrequency, 3) + rcs(PC1,3)), data = english2)
  69 
  70 par(mfrow=c(2,2))
  71 plot(model1.3)
  72 par(mfrow=c(1,1))
  73 
  74 
  75 ## As compared to model1.2, the fit using an rcs on PC1 is better, making PC1 a
  76 ## more significant predictor. Based only on help(english), I'm not clear what
  77 ## the orthographic variables were that we performed PCA on, so I have little
  78 ## intuition on why PC1 provides for a better fit when allowed to be non-linear.
  79 
  80 
  81 ############################################################
  82 ## Baayen 6.8
  83 
  84 data(imaging)
  85 str(imaging)
  86 
  87 imaging.dd = datadist(imaging)
  88 options(datadist = "imaging.dd")
  89 
  90 plot(imaging$BehavioralScore, imaging$FilteredSignal, ylim=c(0,100))
  91 
  92 model6.8.1 = ols( FilteredSignal ~ BehavioralScore, data = imaging)
  93 plot(model6.8.1)
  94 
  95 ## The above plot is worrisome; the confidence intervals are wide enough to
  96 ## allow for a non-increasing mapping between behavioralscore and
  97 ## filteredsignal.
Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
You are not allowed to attach a file to this page.