Attachment 'APS-hw2.R'
Download 1 # Gelman, exercise 4.4 (p. 76)
2
3 library(foreign)
4 library(Design)
5 pollution.dd=datadist(pollution)
6 options(datadist='pollution.dd')
7
8 pollution<-read.dta("~/Desktop/pollution.dta.txt")
9 names(pollution)
10 str(pollution)
11
12
13 # 4.4a
14
15 plot(pollution$nox,pollution$mort,xlab="Relative nitric oxide pollution potential",ylab="Total age-adjusted mortality rate per 100,000")
16
17 # linear regression would probably not fit these data well; there are some outliers with very high NOX rates
18
19 p1 <- ols(mort ~ nox, data=pollution)
20 p1
21 abline(p1)
22
23 # terrible fit! the outliers screw up the regression
24 # interestingly, R^2 is negative!
25
26 plot(density(p1$residuals))
27 abline(v=0)
28
29 # the distribution of the residuals appears somewhat asymmetric, and there's a nasty lil' bump in the right tail--the outliers
30
31
32 # 4.4b
33
34 plot(density(pollution$nox))
35 plot(density(log(pollution$nox)))
36
37 p2 <- ols(mort ~ log(nox), data=pollution)
38 p2
39
40 plot(log(pollution$nox),pollution$mort,xlab="Log relative nitric oxide pollution potential",ylab="Total age-adjusted mortality rate per 100,000")
41 abline(p2)
42
43 # this looks better than the previous model
44
45 plot(density(p2$residuals))
46 abline(v=0)
47
48 # the residuals look more normally distributed than for model p1, but now there's a 'lil bump in the left tail. in fact, I'm not really sure how much the log transformation improved things for the residuals.
49
50
51 # 4.4c
52
53 p2
54
55 # there is a significant effect of relative nitric oxide pollution potential on mortality rate. for each one unit increase in log nitric oxide pollution potential, there is an expected increase in mortality rate of 15.3.
56
57
58 # 4.4d
59
60 plot(density(pollution$hc))
61 plot(density(pollution$so2))
62
63 # both need to be log transformed
64
65 p3 <- ols(mort ~ log(nox) + log(so2) + log (hc), data=pollution)
66 p3
67 plot(p3)
68
69 # this is not showing the whole model, hm ...
70
71 # interpretation of coefficients:
72 # for each one unit increase in log(nox), mortality rate is expected to increase by 58.3
73 # for each one unit increase in log(so2), mortality rate is expected to increase by 11.8
74 # for each one unit increase in log(hc), mortallity rate is expected to decrease by 57.3
75
76
77 # 4.4.e
78
79 p.firsthalf=pollution[1:30,]
80 p.secondhalf=pollution[31:60,]
81
82 p4 <- ols(mort ~ log(nox) + log(so2) + log (hc), data=p.firsthalf)
83 p4
84
85 p.secondhalf.predict<-predict(p4,p.secondhalf)
86 plot(density(p.secondhalf.predict-p.secondhalf$mort))
87
88 # there is a nasty outlier hump in the left field. this may be because there are outliers in the second half of the data
89
90
91 # Baayen, exercise 1 (p. 260)
92
93 # first, recreate their "english2" dataset
94
95 items=english[english$AgeSubject=="young",]
96 items.pca=prcomp(items[,c(18:27)],center=T,scale=T)
97 summary(items.pca)
98 sum((items.pca$sdev^2/sum(items.pca$sdev^2))[1:4])
99 x=as.data.frame(items.pca$rotation[,1:4])
100 x[order(x$PC4), ]
101
102 items$PC1=items.pca$x[,1]
103 items$PC2=items.pca$x[,2]
104 items$PC3=items.pca$x[,3]
105 items$PC4=items.pca$x[,4]
106 items2=english[english$AgeSubject!="young",]
107 items2$PC1=items.pca$x[,1]
108 items2$PC2=items.pca$x[,2]
109 items2$PC3=items.pca$x[,3]
110 items2$PC4=items.pca$x[,4]
111 english2=rbind(items,items2)
112
113 str(english2)
114
115 # add data distribution object
116 library(Design)
117 english2.dd=datadist(english2)
118 options(datadist='english2.dd')
119
120 # THIS DOESN'T WORK. NOT SURE WHY!
121 # english2.ols <- ols(RTlexdec~AgeSubject+rcs(WrittenFrequency,3)+PC1+AgeSubject*WrittenFrequency,data=english2)
122
123
124 # Baayen, exercise 8 (p. 262)
125
126 imaging
127 imaging.lm <- ols(FilteredSignal ~ BehavioralScore + Condition + BehavioralScore:Condition,data=imaging)
128 imaging.lm
129
130 # at the surface, it looks like there is a significant interaction of BehavioralScore by Condition, with a coefficient of 0.78
131
132 # however ...
133
134 plot(density(imaging.lm$residuals))
135
136 # but this is no good! the residuals are not normally distributed
137
138 min(predict(imaging.lm,imaging)-imaging$FilteredSignal)
139 max(predict(imaging.lm,imaging)-imaging$FilteredSignal)
140
141 # this is the outlier score
142
143 imaging[19,]
144
145 # let's try the model again with the outliers removed
146
147 imaging.2 <- subset(imaging,imaging$FilteredSignal>50)
148 imaging.lm2 <- ols(FilteredSignal ~ BehavioralScore + Condition + BehavioralScore:Condition,data=imaging.2)
149 imaging.lm2
150
151 # ouch. no interaction after all!
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.