Attachment 'APS-hw2.R'

Download

   1 # Gelman, exercise 4.4 (p. 76)
   2 
   3 library(foreign)
   4 library(Design)
   5 pollution.dd=datadist(pollution)
   6 options(datadist='pollution.dd')
   7 
   8 pollution<-read.dta("~/Desktop/pollution.dta.txt")
   9 names(pollution)
  10 str(pollution)
  11 
  12 
  13 # 4.4a
  14 
  15 plot(pollution$nox,pollution$mort,xlab="Relative nitric oxide pollution potential",ylab="Total age-adjusted mortality rate per 100,000")
  16 
  17 # linear regression would probably not fit these data well; there are some outliers with very high NOX rates
  18 
  19 p1 <- ols(mort ~ nox, data=pollution)
  20 p1
  21 abline(p1)
  22 
  23 # terrible fit! the outliers screw up the regression
  24 # interestingly, R^2 is negative!
  25 
  26 plot(density(p1$residuals))
  27 abline(v=0)
  28 
  29 # the distribution of the residuals appears somewhat asymmetric, and there's a nasty lil' bump in the right tail--the outliers
  30 
  31 
  32 # 4.4b
  33 
  34 plot(density(pollution$nox))
  35 plot(density(log(pollution$nox)))
  36 
  37 p2 <- ols(mort ~ log(nox), data=pollution)
  38 p2
  39 
  40 plot(log(pollution$nox),pollution$mort,xlab="Log relative nitric oxide pollution potential",ylab="Total age-adjusted mortality rate per 100,000")
  41 abline(p2)
  42 
  43 # this looks better than the previous model
  44 
  45 plot(density(p2$residuals))
  46 abline(v=0)
  47 
  48 # the residuals look more normally distributed than for model p1, but now there's a 'lil bump in the left tail.  in fact, I'm not really sure how much the log transformation improved things for the residuals.
  49 
  50 
  51 # 4.4c
  52 
  53 p2
  54 
  55 # there is a significant effect of relative nitric oxide pollution potential on mortality rate.  for each one unit increase in log nitric oxide pollution potential, there is an expected increase in mortality rate of 15.3.
  56 
  57 
  58 # 4.4d
  59 
  60 plot(density(pollution$hc))
  61 plot(density(pollution$so2))
  62 
  63 # both need to be log transformed
  64 
  65 p3 <- ols(mort ~ log(nox) + log(so2) + log (hc), data=pollution)
  66 p3
  67 plot(p3)
  68 
  69 # this is not showing the whole model, hm ...
  70 
  71 # interpretation of coefficients:
  72 # for each one unit increase in log(nox), mortality rate is expected to increase by 58.3
  73 # for each one unit increase in log(so2), mortality rate is expected to increase by 11.8
  74 # for each one unit increase in log(hc), mortallity rate is expected to decrease by 57.3
  75 
  76 
  77 # 4.4.e
  78 
  79 p.firsthalf=pollution[1:30,]
  80 p.secondhalf=pollution[31:60,]
  81 
  82 p4 <- ols(mort ~ log(nox) + log(so2) + log (hc), data=p.firsthalf)
  83 p4
  84 
  85 p.secondhalf.predict<-predict(p4,p.secondhalf)
  86 plot(density(p.secondhalf.predict-p.secondhalf$mort))
  87 
  88 # there is a nasty outlier hump in the left field.  this may be because there are outliers in the second half of the data
  89 
  90 
  91 # Baayen, exercise 1 (p. 260)
  92 
  93 # first, recreate their "english2" dataset
  94 
  95 items=english[english$AgeSubject=="young",]
  96 items.pca=prcomp(items[,c(18:27)],center=T,scale=T)
  97 summary(items.pca)
  98 sum((items.pca$sdev^2/sum(items.pca$sdev^2))[1:4])
  99 x=as.data.frame(items.pca$rotation[,1:4])
 100 x[order(x$PC4), ]
 101 
 102 items$PC1=items.pca$x[,1]
 103 items$PC2=items.pca$x[,2]
 104 items$PC3=items.pca$x[,3]
 105 items$PC4=items.pca$x[,4]
 106 items2=english[english$AgeSubject!="young",]
 107 items2$PC1=items.pca$x[,1]
 108 items2$PC2=items.pca$x[,2]
 109 items2$PC3=items.pca$x[,3]
 110 items2$PC4=items.pca$x[,4]
 111 english2=rbind(items,items2)
 112 
 113 str(english2)
 114 
 115 # add data distribution object
 116 library(Design)
 117 english2.dd=datadist(english2)
 118 options(datadist='english2.dd')
 119 
 120 # THIS DOESN'T WORK.  NOT SURE WHY!
 121 # english2.ols <- ols(RTlexdec~AgeSubject+rcs(WrittenFrequency,3)+PC1+AgeSubject*WrittenFrequency,data=english2)
 122 
 123 
 124 # Baayen, exercise 8 (p. 262)
 125 
 126 imaging
 127 imaging.lm <- ols(FilteredSignal ~ BehavioralScore + Condition + BehavioralScore:Condition,data=imaging)
 128 imaging.lm
 129 
 130 # at the surface, it looks like there is a significant interaction of BehavioralScore by Condition, with a coefficient of 0.78
 131 
 132 # however ...
 133 
 134 plot(density(imaging.lm$residuals))
 135 
 136 # but this is no good!  the residuals are not normally distributed
 137 
 138 min(predict(imaging.lm,imaging)-imaging$FilteredSignal)
 139 max(predict(imaging.lm,imaging)-imaging$FilteredSignal)
 140 
 141 # this is the outlier score
 142 
 143 imaging[19,]
 144 
 145 # let's try the model again with the outliers removed
 146 
 147 imaging.2 <- subset(imaging,imaging$FilteredSignal>50)
 148 imaging.lm2 <- ols(FilteredSignal ~ BehavioralScore + Condition + BehavioralScore:Condition,data=imaging.2)
 149 imaging.lm2
 150 
 151 # ouch.  no interaction after all!

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2021-04-22 12:55:33, 4.1 KB) [[attachment:APS-hw2.R]]
  • [get | view] (2021-04-22 12:55:33, 491.2 KB) [[attachment:BaayenETAL06.pdf]]
  • [get | view] (2021-04-22 12:55:33, 3.0 KB) [[attachment:VanDurmeSession2.R]]
  • [get | view] (2021-04-22 12:55:33, 7.5 KB) [[attachment:lexdecRT.R]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.

MoinMoin Appliance - Powered by TurnKey Linux