STAT 5 Solutions to Homework 5 Spring 004. Use data set particle.txt. (a) > particle <- read.table("hw05.data.txt", header=t) > full <- lm(y~temp+time+i(temp^)+i(time^)+temp*time, data=particle) > summary(full) Estimate Std. Error t value Pr(> t ) (Intercept) 76.795.0649 6.65 < e-6 *** temp -.99044 0.6754-7.850 < e-6 *** time.88684 0.40767 9.54.7e-0 *** I(temp^) -0.650 0.05987-0.86.50e- *** I(time^) -.57500 0.66-4.5 0.00070 *** temp:time -0.556 0.076-5.55.8e-05 *** Residual standard error:.554 on 8 degrees of freedom Multiple R-Squared: 0.9548, Adjusted R-squared: 0.9467 F-statistic: 8. on 5 and 8 DF, p-value: <.e-6 > library(mass) > qqnorm(stdres(full)) Normal Q Q Plot Sample Quantiles 0 4 0 Theoretical Quantiles (b) > reduced <- lm(y~temp+time,data=particle) > anova(reduced,full) Analysis of Variance Table Model : y ~ temp + time Model : y ~ temp + time + I(temp^) + I(time^) + temp * time Res.Df RSS Df Sum of Sq F Pr(>F) 65.90 8 5.66 00.4 60.705.59e- *** We reject the null hypothesis, p-value< 0.000. Therefore, a quadratic curvature in response (as a function of x s) appears to be statistically detectable. (c) [ ] [ ] [ ] [ ] x β = β5 β.050 = is where we have a local maximum. [ We should check that it x β 5 β 4 β.769 is a maximum and that it is the absolute maximum. ]
Using functions that are built into R. > max.x <- data.frame(-.050440,.76884) > names(max.x) <- c("temp","time") > predict(full,max.x, interval="confidence",level=0.90) fit lwr upr [,] 84.7997 8.5944 86.995 > predict(full,max.x, interval="prediction",level=0.90) fit lwr upr [,] 84.7997 78.605 9.74 Using matrix calculations. x0 <- c(, max.x$temp, max.x$time, max.x$temp^, max.x$time^, max.x$temp*max.x$time) y0 <- sum(full$coef*x0) X <- cbind(rep(,dim(particle)[]), particle$temp, particle$time, particle$temp^, particle$time^, particle$temp*particle$time) MSE <- anova(full)$mean[length(anova(full)$mean)] cxxc <- x0%*%ginv(t(x)%*%x)%*%x0 ll <- y0 - qt(0.95,full$df.residual)*sqrt(mse*cxxc) # lower and upper confidence limits ul <- y0 + qt(0.95,full$df.residual)*sqrt(mse*cxxc) ll <- y0 - qt(0.95,full$df.residual)*sqrt(mse*(+cxxc)) # lower and upper prediction limits ul <- y0 + qt(0.95,full$df.residual)*sqrt(mse*(+cxxc)). There is weak evidence of lack of fit, p-value = 0.055. Using functions that are built into R. > cells <- lm(y~as.factor(temp)*as.factor(time),data=particle) > anova(cells,full) Analysis of Variance Table Model : y ~ as.factor(temp) * as.factor(time) Model : y ~ temp + time + I(temp^) + I(time^) + temp * time Res.Df RSS Df Sum of Sq F Pr(>F) 4 0.44 8 5.66-4 -4. 5.479 0.05478 Using matrix calculations. > Y <- particle$y > F.ratio <- ((t(y)%*%(project(xstar)-project(x))%*%y)/(0-6))/ ((t(y)%*%(diag(rep(,4))-project(xstar))%*%y)/(4-0)) > p.value <- -pf(f.ratio,0-6,4-0). Use scenario of Problem 4.7 of Rencher. (a) > d y protein meat 7 0 8... 46 97 47 06 (b) > attach(d) # The database "d" is searched by R when evaluating a variable > means <- tapply(y,list(meat,protein),mean) > means 96.50000 79.0000 85.90000 9.8 95.6667 8.00000
(c) > matplot(c(,),c(60,0),type="n",xlab="meat",ylab="mean Response",main="Weight Gain") > matlines(x.axis,means,type="b",cex=) Weight Gain Mean Response 60 70 80 90 00 0.0.5.0.5.0 Meat (d) options(contrasts=c("contr.sum","contr.sum")) lm.out <- lm(y~protein*meat,data=d) > summary.aov(lm.out,sstype=) protein 89. 89. 4.096 0.05. meat.8 6.9 0.06 0.9670 protein:meat 95.7 597.9.9057 0.06605. Residuals 4 846. 05.8 > lm.out <- lm(y~meat*protein,data=d) > summary.aov(lm.out,sstype=) meat 6.4 8. 0.099 0.96097 protein 86.5 86.5 4.070 0.0568. meat:protein 95.7 597.9.9057 0.06605. Residuals 4 846. 05.8 (e) Using matrix calculations. i. Using the sum restrictions. Numerical results are shown in part (f). Xeffects <- model.matrix(lm.out) # Type I ssa <- t(d$y)%*%(project(xeffects[,:])-project(xeffects[,]))%*%d$y # R(a mu) ssb <- t(d$y)%*%(project(xeffects[,:4])-project(xeffects[,:]))%*%d$y ssab <- t(d$y)%*%(project(xeffects)-project(xeffects[,:4]))%*%d$y # Type II ssa <- t(d$y)%*%(project(xeffects[,:4])-project(xeffects[,c(,,4)]))%*%d$y ssb <- t(d$y)%*%(project(xeffects[,:4])-project(xeffects[,c(,)]))%*%d$y ssab <- t(d$y)%*%(project(xeffects)-project(xeffects[,:4]))%*%d$y Xcells <- matrix(0,47,6) Xcells[:8,]<- matrix(rep(c(,0,0,0,0,0),8),8,6,byrow=t) Xcells[9:8,]<- matrix(rep(c(0,,0,0,0,0),0),0,6,byrow=t) Xcells[9:4,]<- matrix(rep(c(0,0,,0,0,0),6),6,6,byrow=t) Xcells[5:4,]<- matrix(rep(c(0,0,0,,0,0),0),0,6,byrow=t) Xcells[5:40,]<- matrix(rep(c(0,0,0,0,,0),6),6,6,byrow=t) Xcells[4:47,]<- matrix(rep(c(0,0,0,0,0,),7),7,6,byrow=t)
# Type III C <- matrix(c(/,/,/,-/,-/,-/),,6,byrow=t) # For SSprotein C <- matrix(c(rep(c(/,-/,0),),rep(c(/,0,-/),)),,6,byrow=t) # For SSmeat CXXC <- C%*%ginv(t(Xcells)%*%Xcells)%*%t(C) Cb <- C%*%ginv(t(Xcells)%*%Xcells)%*%t(Xcells)%*%d$y SSH0 <- t(cb)%*%solve(cxxc)%*%cb # Gives the value of SS ii. Using the SAS baseline restriction we obtain the same results as with the sum restriction. Xsas <- matrix(na,47,6) Xsas[,] <- rep(,47) Xsas[,] <- c(rep(,4),rep(0,)) Xsas[,] <- c(rep(,8),rep(0,6),rep(,0),rep(0,)) Xsas[,4] <- c(rep(0,8),rep(,0),rep(0,6),rep(,6),rep(0,7)) Xsas[,5] <- Xsas[,]*Xsas[,] Xsas[,6] <- Xsas[,]*Xsas[,4] # Type I ssa <- t(d$y)%*%(project(xsas[,:])-project(xsas[,]))%*%d$y # R(a mu) ssb <- t(d$y)%*%(project(xsas[,:4])-project(xsas[,:]))%*%d$y ssab <- t(d$y)%*%(project(xsas)-project(xsas[,:4]))%*%d$y # Type II ssa <- t(d$y)%*%(project(xsas[,:4])-project(xsas[,c(,,4)]))%*%d$y ssb <- t(d$y)%*%(project(xsas[,:4])-project(xsas[,c(,)]))%*%d$y ssab <- t(d$y)%*%(project(xsas)-project(xsas[,:4]))%*%d$y (f) Using function anova() for Type I and function Anova() from package car for Types II and III. Anova Table (Type I tests) protein 89. 89. 4.096 0.05. meat.8 6.9 0.06 0.9670 protein:meat 95.7 597.9.9057 0.06605. Residuals 4 846. 05.8 Anova Table (Type II tests) protein 86.5 4.070 0.0568. meat.8 0.06 0.9670 protein:meat 95.7.9057 0.06605. Residuals 846. 4 Anova Table (Type III tests) (Intercept) 598 707.806 < e-6 *** protein 75.650 0.0604. meat 9 0.0 0.9780 protein:meat 96.9057 0.06605. Residuals 846 4 4. Use fake -way factorial data. (a) Repeat parts (a)-(e) of Problem on these data. i. > fake Y A B 4 4
0 4 5 9 6 7 8 6 9 7 0 0 7 ii. > means <- tapply(fake$y,list(fake$a,fake$b),mean) > means 4.0.0 9.5 6.5 0.0 7.0 iii. > x.axis <- unique(fake$a) > matplot(c(,),c(5,5),type="n",xlab="factor A",ylab="Mean Response") > matlines(x.axis,means,type="b") Mean Response 6 8 0 4.0.5.0.5.0 Factor A iv. > fake$a <- as.factor(fake$a) > fake$b <- as.factor(fake$b) > options(contrasts=c("contr.sum","contr.sum")) > lm.out <- lm(y~a*b,data=fake) > summary.aov(lm.out,sstype=) A.50.5.50 0.04095 * B 7.09 8.50 8.595 0.005 * A:B 4.68 0.657 0.6569 0.668 Residuals.000.000 > lm.out <- lm(y~b*a,data=fake) > summary.aov(lm.out,sstype=) B 9.0500 4.550 4.550 0.0864 * A 0.9 5.95 5.95 0.07 * B:A 4.676 0.6569 0.6569 0.668 Residuals.0000.0000 v. Using matrix calculations. Using the sum restrictions. Numerical results are shown in part (vi). Xeffects <- model.matrix(lm.out) Y <- fake$y 5
# Type I ssa <- t(y)%*%(project(xeffects[,:])-project(xeffects[,]))%*%y # R(a mu) ssb <- t(y)%*%(project(xeffects[,:5])-project(xeffects[,:]))%*%y ssab <- t(y)%*%(project(xeffects)-project(xeffects[,:5]))%*%y # Type II ssa <- t(y)%*%(project(xeffects[,:5])-project(xeffects[,c(,4,5)]))%*%y ssb <- t(y)%*%(project(xeffects[,:5])-project(xeffects[,c(,,)]))%*%y ssab <- t(y)%*%(project(xeffects)-project(xeffects[,:5]))%*%y > Xcells 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 7 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 # Type III # For SSA C <- matrix(c(/,/,/,0,0,0,-/,-/,-/,0,0,0,/,/,/,-/,-/,-/),,9,byrow=t) # For SSB C <- matrix(c(rep(c(/,0,-/),),rep(c(0,/,-/),)),,9,byrow=t) CXXC <- C%*%ginv(t(Xcells)%*%Xcells)%*%t(C) Cb <- C%*%ginv(t(Xcells)%*%Xcells)%*%t(Xcells)%*%Y SSH0 <- t(cb)%*%solve(cxxc)%*%cb # Gives the value of SS Using the SAS baseline restriction we obtain the same results as with the sum restriction. > Xsas 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 5 0 0 0 0 0 6 0 0 0 0 0 7 0 0 0 0 0 8 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 # Type I ssa <- t(y)%*%(project(xsas[,:])-project(xsas[,]))%*%y # R(a mu) ssb <- t(y)%*%(project(xsas[,:5])-project(xsas[,:]))%*%y ssab <- t(y)%*%(project(xsas)-project(xsas[,:5]))%*%y 6
# Type II ssa <- t(y)%*%(project(xsas[,:5])-project(xsas[,c(,4,5)]))%*%y ssb <- t(y)%*%(project(xsas[,:5])-project(xsas[,c(,,)]))%*%y ssab <- t(y)%*%(project(xsas)-project(xsas[,:5]))%*%y vi. Using function anova() for Type I and function Anova() from package car for Types II and III. > anova(lm.out) Anova Table (Type I tests) A.50.5.50 0.04095 * B 7.09 8.50 8.595 0.005 * A:B 4.68 0.657 0.6569 0.668 Residuals.000.000 Anova Table (Type II tests) A 0.9 5.95 0.07 * B 7.09 8.595 0.005 * A:B.68 4 0.6569 0.668 Residuals.000 Anova Table (Type III tests) (Intercept) 8.5 8.5 5.798e-05 *** A 5..554 0.0487 * B. 6.655 0.0474 * A:B.6 4 0.6569 0.668 Residuals.00 (b) Xincomp.full <- Xcells[-c(6,7),-5] Xincomp.red <- Xeffects[-c(6,7),:5] Y.incomp <- Y[-c(6,7)] full <- lm(y.incomp~xincomp.full-) reduced <- lm(y.incomp~xincomp.red-) anova(reduced,full) Analysis of Variance Table Model : Y.incomp ~ Xincomp.red - Model : Y.incomp ~ Xincomp.full - Res.Df RSS Df Sum of Sq F Pr(>F) 5.898.5000.98 0.546 0.7954 F-ratio = 0.5 with p-value= 0.7954. There is not enough evidence that there is interaction between factors A and B. (c) X <- Xeffects[-c(6,7),:5] C <- c(,0,,0,) mu <- C%*%ginv(t(X)%*%X)%*%t(X)%*%Y.incomp > mu [,] [,] 0.085 7