cps = read.csv("http://www.macalester.edu/~kaplan/ism/datasets/cps.csv") attach(cps) library(MASS) # For ginv function #### 2-way ANOVA Example #### # Need to source rref.R source("/Users/shancock/Documents/Clark-Teaching/MATH219/2012-Spring/Handouts-Week7/rref.R") # Look at some plots plot(wage~sex) plot(wage~married) interaction.plot(sex,married,wage) # Plots means of each sex*married group tapply(wage,list(sex,married),mean) # Means of each of the four categories # Try plots w/o outlier max(wage) cps[wage==44.5,] cps2 = cps[-249,] interaction.plot(cps2$sex,cps2$married,cps2$wage) tapply(cps2$wage,list(cps2$sex,cps2$married),mean) # Response vector y = matrix(wage) # Sample size n = dim(cps)[1] # Create design matrix x0 = matrix(rep(1,n)) x1 = matrix(as.numeric(sex=="M")) # Male effect x2 = matrix(as.numeric(sex=="F")) # Female effect x3 = matrix(as.numeric(married=="Single")) x4 = matrix(as.numeric(married=="Married")) x5 = matrix(as.numeric(sex=="M"&married=="Single")) x6 = matrix(as.numeric(sex=="M"&married=="Married")) x7 = matrix(as.numeric(sex=="F"&married=="Single")) x8 = matrix(as.numeric(sex=="F"&married=="Married")) X = cbind(x0,x1,x2,x3,x4,x5,x6,x7,x8) # Fitted coefficients (not estimable) XtX = t(X)%*%X G = ginv(XtX) beta = G%*%t(X)%*%y # Check solution XtX%*%beta t(X)%*%y # Find different gen. inverse Z = matrix(rnorm(81),nrow=9,ncol=9) G2 = G + Z - G%*%XtX%*%Z%*%XtX%*%G # Check generalized inverse XtX XtX%*%G2%*%XtX # Find different fitted coefficients beta2 = G2%*%t(X)%*%y # Check solution XtX%*%beta2 t(X)%*%y # Fitted values (estimable) P = X%*%G%*%t(X) yhat = P%*%y # Could also use X%*%beta # Try with different generalized inverse P2 = X%*%G2%*%t(X) yhat2 = P2%*%y # Compare head(yhat) head(yhat2) ### March 2 # Basis set of estimable functions (take non-zero cols) t(rref(XtX)) C = t(rref(XtX))[,c(1,2,4,6)] C # Estimates of basis set of estimable functions contrast.est = t(C)%*%beta contrast.est # Where do the estimates come from? tapply(wage,list(sex,married),mean) # Estimated variance of each estimate (on diagonal) mse = (t(y-X%*%beta)%*%(y-X%*%beta))/(534-4) as.vector(mse)*t(C)%*%G%*%C # Approximate 95% CI for the fourth estimable function in basis -3.097+c(-1,1)*qt(.025,530)*sqrt(.823)