attachment:myFunctions.R of Feb2011Regression

   1 ################################################################
   2 # se: calculates standard error
   3 ################################################################
   4 
   5 se <- function(x)
   6       {
   7         y <- x[!is.na(x)] # remove the missing values, if any
   8         sqrt(var(as.vector(y))/length(y))
   9 }
  10 
  11 ################################################################
  12 # myCenter: centers data.frames or vectors
  13 ################################################################
  14 
  15 myCenter <- function(x) {
  16 	if (is.numeric(x)) { return(x - mean(x)) }
  17 	if (is.factor(x)) {
  18 		x <- as.numeric(x)
  19 		return(x - mean(x))
  20 	}
  21 	if (is.data.frame(x) || is.matrix(x)) {
  22 		m <- matrix(nrow=nrow(x), ncol=ncol(x))
  23 		colnames(m) <- paste("c", colnames(x), sep="")
  24 		for (i in 1:ncol(x)) {
  25 			if (is.factor(x[,i])) {
  26 				y <- as.numeric(x[,i])
  27 				m[,i] <- y - mean(y, na.rm=T)
  28                         }
  29 			if (is.numeric(x[,i])) {
  30 				m[,i] <- x[,i] - mean(x[,i], na.rm=T)
  31                 }
  32           }
  33         return(as.data.frame(m))
  34     }
  35 }
  36 
  37 
  38 ################################################################
  39 # beehive: hexbin plots with regression line of logit models
  40 ################################################################
  41 
  42 beehive <- function(
  43 # version 0.41
  44 # written by tiflo@csli.stanford.edu
  45 # code contributions from Austin Frank, Ting Qian, and Harald Baayen
  46 # remaining errors are mine (tiflo@csli.stanford.edu)
  47 #
  48 # last modified 03/14/10
  49 #
  50 # now also supports linear models
  51 # backtransforms centering and standardization
  52 #
  53 # known problems:
  54 #   too simple treatment of random effects
  55 #   the dotted lines are not really CIs yet.
  56 	model,
  57 	name.predictor,
  58 	name.outcome= "outcome",
  59 	predictor= NULL,
  60 
  61 	# is the predictor centered IN THE MODEL?
  62 	# is the predictor transformed before
  63 	# (centered and) ENTERED INTO THE MODEL?
  64 	predictor.centered= if(!is.null(predictor)) { T } else { F },
  65 	predictor.standardized= F,
  66 	predictor.transform= NULL,
  67 	fun= NULL,
  68 	type= "hex",
  69 	main= NA,
  70 	xlab= NA,
  71 	ylab= NA,
  72 	xlim= NA,
  73 	ylim= NA,
  74 	legend.position="right",
  75 	fontsize=16,
  76 #	col.line= "#333333",
  77 	col.line= "red2",
  78 	col.ci= col.line,
  79 	lwd.line= 1.2,
  80 	lty.line= "solid",
  81 	alpha.ci= 3/10,
  82 	hex.mincnt= 1,
  83 	hex.limits = c(round(log(hex.mincnt, base=10)), round(log(nrow(model@frame) / 2, base=10))),
  84 	hex.midpoint = (max(hex.limits) - min(hex.limits)) / 2,
  85 	hex.nbreaks = 5,
  86 	hex.breaks = round(seq(min(hex.limits), max(hex.limits), length.out=hex.nbreaks)),
  87 	hex.trans = "log10",
  88 	...
  89 )
  90 {
  91     	if (!is(model, "mer")) {
  92      		stop("argument should be a mer model object")
  93     	}
  94     	if ((length(grep("^glmer", as.character(model@call))) == 1) &
  95           (length(grep("binomial", as.character(model@call))) == 1)) {
  96 		model.type = "binomial"
  97 	} else {
  98 		if (length(grep("^lmer", as.character(model@call))) == 1) {
  99 			model.type = "gaussian"
 100 		}
 101 	}
 102 	if (!(model.type %in% c("binomial","gaussian"))) {
 103 		stop("argument should be a glmer binomial or gaussian model object")
 104 	}
 105     	if (!is.na(name.outcome)) {
 106      	   	if (!is.character(name.outcome))
 107             	stop("name.outcome should be a string\n")
 108     	}
 109 	if (!is.na(xlab[1])) {
 110         	if (!is.character(xlab))
 111             	stop("xlab should be a string\n")
 112     	}
 113     	if (!is.na(ylab)) {
 114      	   	if (!is.character(ylab))
 115             	stop("ylab should be a string\n")
 116     	}
 117 	# load libaries
 118 	require(lme4)
 119 	require(Design)
 120 	require(ggplot2)
 121 
 122 	if (predictor.standardized) { predictor.centered = T }
 123     	if (is.null(fun)) {
 124 		if (is.na(ylab)) {
 125 		 	if (model.type == "binomial") { ylab= paste("Predicted log-odds of", name.outcome) }
 126 			if (model.type == "gaussian") { ylab= paste("Predicted ", name.outcome) }
 127 		}
 128 		fun= I
 129 	} else {
 130 		if (is.na(ylab)) {
 131 		 	if (model.type == "binomial") { ylab= paste("Predicted probability of", name.outcome) }
 132 			if (model.type == "gaussian") { ylab= paste("Predicted ", name.outcome) }
 133 		}
 134 		fun= match.fun(fun)
 135 	}
 136     	if (!is.null(predictor.transform)) {
 137 		predictor.transform= match.fun(predictor.transform)
 138     	} else { predictor.transform= I }
 139 
 140 	indexOfPredictor= which(names(model@fixef) == name.predictor)
 141 
 142 	# get predictor
 143 	if (is.null(predictor)) {
 144 		# simply use values from model matrix X
 145  		predictor= model@X[,indexOfPredictor]
 146 
 147 		# function for predictor transform
 148 		fun.predictor= I
 149 
 150 		if (is.na(xlab)) { xlab= name.predictor }
 151     	}     	
 152 else {
 153 		# function for predictor transform
 154 		trans.pred = predictor.transform(predictor)
 155 		m= mean(trans.pred, na.rm=T)
 156 		rms = sqrt(var(trans.pred, na.rm=T) /(sum(ifelse(is.na(trans.pred),0,1)) - 1))
 157 		fun.predictor <- function(x) 
 158 		{
 159 			x= predictor.transform(x) 			
 160 			if (predictor.centered == T) { x= x - m } 			
 161 			if (predictor.standardized == T) { x= x / rms }  					
 162 			return(x) 					 
 163 		}	 	
 164 		if ((is.na(xlab)) & (label(predictor) != "")) { xlab = label(predictor) }    	
 165 	}
 166 	
 167 # get outcome for binomial or gaussian model 	
 168 if (model.type == "binomial") { 		
 169 	outcome= fun(qlogis(fitted(model))) 	
 170 	} 
 171 else { 		
 172 	outcome= fun(fitted(model)) 	
 173 	}      
 174 	## calculate grand average but exclude effect to be modeled      
 175 	## (otherwise it will be added in twice!)      
 176 	## random effects are all included, even those for predictor (if any).      
 177 	## should random slope terms for the predictor be excluded?      
 178 	## prediction from fixed effects
 179 
 180 
 181 	if (ncol(model@X) > 2) {			
 182 	Xbeta.hat = model@X[, -indexOfPredictor] %*% model@fixef[-indexOfPredictor]
 183 	} else {
 184 		Xbeta.hat = model@X[, -indexOfPredictor] %*% t(model@fixef[-indexOfPredictor])
 185 	}
 186 
 187      ## adjustment from random effects
 188      Zb = crossprod(model@Zt, model@ranef)@x
 189 
 190      ## predicted value using fixed and random effects
 191      Y.hat = Xbeta.hat + Zb
 192 
 193      ## intercept is grand mean of predicted values
 194      ## (excluding fixed effect of predictor)
 195      ## (including random effects of predictor, if any)
 196      int = mean(Y.hat)
 197 
 198 	# slope
 199 	slope <- fixef(model)[name.predictor]
 200 
 201 	## error and confidence intervals
 202 	stderr <- sqrt(diag(vcov(model)))
 203 	names(stderr) <- names(fixef(model))
 204 	slope.se <- stderr[name.predictor]
 205 	lower <- -1.96 * slope.se
 206 	upper <- 1.96 * slope.se
 207 	
 208 	# setting graphical parameters
 209 	if (is.na(ylim)) { ylim= c(min(outcome) - 0.05 * (max(outcome) - min(outcome)), max(outcome) + 0.05 * (max(outcome) - min(outcome)) ) }
 210    	if (is.na(xlim)) { xlim= c(min(predictor) - 0.05 * (max(predictor) - min(predictor)), max(predictor) + - 0.05 * (max(predictor) - min(predictor))) }
 211 
 212 	print("Printing with ...")
 213 	print(paste("   int=", int))
 214 	print(paste("   slope=", slope))
 215 	print(paste("   centered=", predictor.centered))
 216 	print("   fun:")
 217 	print(fun.predictor)
 218 
 219 	pdata= data.frame( 	predictor=predictor, outcome=outcome	)
 220 
 221 	x= seq(xlim[1], xlim[2], length=1000)
 222 	fit= int + slope * fun.predictor(x)
 223 	ldata= data.frame(
 224 				predictor= x,
 225 				outcome= fun(fit),
 226 				transformed.lower= fun(fit + lower),
 227 				transformed.upper= fun(fit + upper)
 228 	)
 229 	theme_set(theme_grey(base_size=fontsize))
 230 	theme_update(axis.title.y=theme_text(angle=90, face="bold", size=fontsize, hjust=.5, vjust=.5))
 231 	theme_update(axis.title.x=theme_text(angle=0, face="bold", size=fontsize, hjust=.5, vjust=.5))
 232 	p <- ggplot(data=pdata, aes(x=predictor, y=outcome)) +
 233 		xlab(xlab) +
 234 		ylab(ylab) +
 235 		xlim(xlim) +
 236 		ylim(ylim) +
 237 		opts(legend.position=legend.position, aspect.ratio=1)
 238 	
 239 
 240 	    # 	for degbugging:
 241 		# panel.lines(rep(mean(x),2), c(min(y),max(y)))
 242 		# panel.lines(c(min(x),max(x)), c(mean(y),mean(y)))
 243 
 244 	if (type == "points") {
 245 		p <- p + geom_point(alpha=3/10)
 246 	} else if (type == "hex") {
 247 #		p <- p + geom_hex(bins = 30) +
 248 #			scale_fill_gradient2(low= "lightyellow",
 249 #							mid="orange",
 250 #							high=muted("red"),
 251 #							midpoint= hex.midpoint,
 252 #							space="rgb",
 253 #							name= "Count",
 254 #							limits= hex.limits,
 255 #							breaks= hex.breaks,
 256 #							trans = hex.trans
 257 #							)
 258 #		lo <- muted(rgb(230,255,230,max=255))
 259 #		mi <- rgb(150,185,150,max=255)
 260 #		hi <- rgb(95,130,95,max=255)
 261 
 262 		lo <- "#D3DFD3"
 263 		hi <- "#006600"
 264 		mi <- "#88AF88"
 265 		p <- p + geom_hex(bins = 30) +
 266 			scale_fill_gradient2(low = lo,
 267 							mid=mi,
 268 							high=hi,
 269 							space="rgb",
 270 							name="Count")
 271 	}
 272 	p + 	geom_ribbon(data=ldata,
 273 			aes(	x= predictor,
 274 				ymin=transformed.lower,
 275 				ymax=transformed.upper
 276 			),
 277 			fill= col.ci,
 278 			alpha= alpha.ci
 279 		) +
 280 		geom_line(data=ldata,
 281 			aes(x= predictor,
 282 				y=outcome
 283 			),
 284 			colour= col.line,
 285 			size= lwd.line,
 286 			linetype= lty.line,
 287 			alpha=1
 288 		)
 289 	
 290 	
 291 }
Attachment 'myFunctions.R'

Attached Files