Attachment 'VanDurmeSession2.R'
Download 1 ## Benjamin Van Durme, vandurme@cs.rochester.edu, 4 Jun 2008
2 ## Time-stamp: <2008-06-06 15:19:16 vandurme>
3
4 ## setup environment
5 library(languageR)
6 library(Design)
7
8 ########################################
9 ## Baayen 6.1
10
11 data(english)
12
13 ## Create english2 as according to the text
14
15 items = english[english$AgeSubject=="young",]
16 items.pca = prcomp(items[,c(18:27)],center=T,scale=T)
17 x= as.data.frame(items.pca$rotation[,1:4])
18 items$PC1 = items.pca$x[,1]
19 items$PC2 = items.pca$x[,2]
20 items$PC3 = items.pca$x[,3]
21 items$PC4 = items.pca$x[,4]
22 items2 = english[english$AgeSubject != "young", ]
23 items2$PC1 = items.pca$x[,1]
24 items2$PC2 = items.pca$x[,2]
25 items2$PC3 = items.pca$x[,3]
26 items2$PC4 = items.pca$x[,4]
27 english2 = rbind(items, items2)
28
29 # english2$NVratio = log(english2$NounFrequency+1) - log(english2$VerbFrequency+1)
30
31 ## Create datadist object to inform Design of ranges for plots
32
33 english2.dd = datadist(english2)
34 options(datadist = "english2.dd")
35
36 ## Look at WrittenFrequency compared to lexdec
37
38 plot(english2$RTlexdec, english2$WrittenFrequency)
39
40 ## It looks there are two distributions there, perhaps old vs young?
41
42 with(english2[english2$AgeSubject == "old",],plot(WrittenFrequency, RTlexdec,ylim=c(6,7.5)))
43 with(english2[english2$AgeSubject == "young",],points(WrittenFrequency, RTlexdec, col="blue"))
44
45
46 ## Fit a model as according to the instructions
47
48 model1.0 = ols( RTlexdec ~ AgeSubject + WrittenFrequency + PC1, data = english2)
49 model2 = ols( RTnaming ~ AgeSubject + WrittenFrequency + PC1, data = english2)
50
51 ## interaction between WrittenFrequency and AgeSubject
52 model1.1 = ols( RTlexdec ~ AgeSubject + WrittenFrequency + PC1 + WrittenFrequency * AgeSubject, data = english2)
53
54 par(mfrow=c(2,2))
55 plot(model1.1)
56 par(mfrow=c(1,1))
57
58 ## add restricted cubic spline on WrittenFrequency
59 model1.2 = ols( RTlexdec ~ AgeSubject * (rcs(WrittenFrequency, 3) + PC1), data = english2)
60
61 par(mfrow=c(2,2))
62 plot(model1.2)
63 par(mfrow=c(1,1))
64
65 ## PC1 is not significant in the above model, why? The assignment suggests a
66 ## 3knot rcs to make it non-linear
67
68 model1.3 = ols( RTlexdec ~ AgeSubject * (rcs(WrittenFrequency, 3) + rcs(PC1,3)), data = english2)
69
70 par(mfrow=c(2,2))
71 plot(model1.3)
72 par(mfrow=c(1,1))
73
74
75 ## As compared to model1.2, the fit using an rcs on PC1 is better, making PC1 a
76 ## more significant predictor. Based only on help(english), I'm not clear what
77 ## the orthographic variables were that we performed PCA on, so I have little
78 ## intuition on why PC1 provides for a better fit when allowed to be non-linear.
79
80
81 ############################################################
82 ## Baayen 6.8
83
84 data(imaging)
85 str(imaging)
86
87 imaging.dd = datadist(imaging)
88 options(datadist = "imaging.dd")
89
90 plot(imaging$BehavioralScore, imaging$FilteredSignal, ylim=c(0,100))
91
92 model6.8.1 = ols( FilteredSignal ~ BehavioralScore, data = imaging)
93 plot(model6.8.1)
94
95 ## The above plot is worrisome; the confidence intervals are wide enough to
96 ## allow for a non-increasing mapping between behavioralscore and
97 ## filteredsignal.
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.