Two-Way ANOVA

Data rats is from the faraway package, which has 48 observations on the following 3 variables.

time: survival time in tens of hours
poison: a factor with levels I II III
treat: a factor with levels A B C D

library(faraway)

?rats
summary(rats)
rats

Graphical Display

par(mfrow=c(2,2))

boxplot(time~treat, data=rats, outline=FALSE)
stripchart(time~treat, data=rats, method="jitter",
           col="blue", vertical=TRUE, add=TRUE)

boxplot(time~poison, data=rats, outline=FALSE)
stripchart(time~poison, data=rats, method="jitter",
           col="blue", vertical=TRUE, add=TRUE)

interaction.plot(rats$treat, rats$poison, rats$time)
interaction.plot(rats$poison, rats$treat, rats$time)

Try ggplot2. Save the 4 plots and then use the multiplot function from here.

library(ggplot2)

# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols:   Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                    ncol = cols, nrow = ceiling(numPlots/cols))
  }

 if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

p1 = ggplot(aes(x=treat, y=time), data=rats) + 
  geom_boxplot() +
  theme(axis.text.x = element_text(angle=-45)) + 
  ylab("Survival Time")

p2 = ggplot(aes(x=poison, y=time), data=rats) + 
  geom_boxplot() +
  theme(axis.text.x = element_text(angle=-45)) + 
  ylab("Survival Time")

p3 = ggplot(rats, aes(x=poison, y=time)) + 
  geom_point() + 
  stat_summary(fun.y="mean", geom="line", aes(group=treat, linetype=treat))+
  theme(legend.position = "top", legend.direction = "horizontal")

p4 = ggplot(rats, aes(x=treat, y=time)) + 
  geom_point() + 
  stat_summary(fun.y="mean", geom="line", aes(group=poison, linetype=poison))+
  theme(legend.position = "top", legend.direction = "horizontal")

multiplot(p1, p2, p3, p4, cols=2)

Two-way ANOVA Model with F-tests

g=lm(time ~ poison*treat, rats)
anova(g)

## Analysis of Variance Table
## 
## Response: time
##              Df  Sum Sq Mean Sq F value    Pr(>F)    
## poison        2 1.03301 0.51651 23.2217 3.331e-07 ***
## treat         3 0.92121 0.30707 13.8056 3.777e-06 ***
## poison:treat  6 0.25014 0.04169  1.8743    0.1123    
## Residuals    36 0.80073 0.02224                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Diagnostics

par(mfrow=c(1,2))
qqnorm(g$res)
plot(g$fitted, g$res, xlab="FItted", ylab="Residuals")

Notice the trumpet pattern for the residuals, so try Box-cox transformation.

library(MASS)
boxcox(g)

Re-Run Two-way ANOVA Analysis

Try the reciprocal transformation

g=lm(1/time ~ poison*treat, rats)
anova(g)

## Analysis of Variance Table
## 
## Response: 1/time
##              Df Sum Sq Mean Sq F value    Pr(>F)    
## poison        2 34.877 17.4386 72.6347 2.310e-13 ***
## treat         3 20.414  6.8048 28.3431 1.376e-09 ***
## poison:treat  6  1.571  0.2618  1.0904    0.3867    
## Residuals    36  8.643  0.2401                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

par(mfrow=c(1,2))
qqnorm(g$res)
plot(g$fitted, g$res, xlab="FItted", ylab="Residuals")

Pairwise Comparisons

Pairwise CIs for treat.

TukeyHSD(aov(1/time ~ poison + treat, data=rats), "treat")

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = 1/time ~ poison + treat, data = rats)
## 
## $treat
##           diff        lwr         upr     p adj
## B-A -1.6574024 -2.1959343 -1.11887050 0.0000000
## C-A -0.5721354 -1.1106673 -0.03360355 0.0335202
## D-A -1.3583383 -1.8968702 -0.81980640 0.0000002
## C-B  1.0852669  0.5467351  1.62379883 0.0000172
## D-B  0.2990641 -0.2394678  0.83759598 0.4550931
## D-C -0.7862029 -1.3247347 -0.24767096 0.0018399

myCIs = TukeyHSD(aov(1/time ~ poison + treat, data=rats), "treat")
plot(myCIs)

Check how Tukey’s intervals are computed.

treat.CI = TukeyHSD(aov(1/time ~ poison + treat, data=rats), "treat")$treat
treat.CI[,1]-treat.CI[,2]  # half-width of the CIs

##       B-A       C-A       D-A       C-B       D-B       D-C 
## 0.5385319 0.5385319 0.5385319 0.5385319 0.5385319 0.5385319

qtukey(0.95,4,42)*summary(g)$sigma*sqrt(2/(48/4))/sqrt(2)

## [1] 0.5350869

Do the same for the pairwise CIs for poison

poison.CI=TukeyHSD(aov(1/time ~ poison + treat, data=rats), "poison")$poison
poison.CI[,1]-poison.CI[,2]  # half-width of the CIs
qtukey(0.95,3,42)*summary(g)$sigma*sqrt(2/(48/3))/sqrt(2)

Unbalanced Design

We remove the 1st obesrvation from the data rats to create a two-factor example with unblanced design.

newrats=rats;
newrats=newrats[-1,]

Although the ANOVA output changes with the order of the two factors, the information stays the same: the interaction is not significant; the two factors are significant no matter whether the other factor is included or not included in the model. So the data support the additive model.

anova(lm(1/time ~ poison*treat, newrats))

## Analysis of Variance Table
## 
## Response: 1/time
##              Df Sum Sq Mean Sq F value    Pr(>F)    
## poison        2 36.672 18.3358 81.0799 7.276e-14 ***
## treat         3 18.567  6.1889 27.3670 2.706e-09 ***
## poison:treat  6  1.980  0.3300  1.4592    0.2207    
## Residuals    35  7.915  0.2261                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(lm(1/time ~ treat*poison, newrats))

## Analysis of Variance Table
## 
## Response: 1/time
##              Df Sum Sq Mean Sq F value    Pr(>F)    
## treat         3 20.136  6.7121 29.6807 9.986e-10 ***
## poison        2 35.102 17.5510 77.6094 1.362e-13 ***
## treat:poison  6  1.980  0.3300  1.4592    0.2207    
## Residuals    35  7.915  0.2261                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Another Unbalanced Example

The experiment had litters of rats born to one mother but raised by another. The row factor is the genotype of the litter and the column factor is the genotype of the foster mother. There are n=61 litters, and the y’s are the average weight of the litters in grams at 28 days. The design is unbalanced.

Weight=c(61.5, 68.2, 64,65,59.7, 55, 42, 60.2, 52.5, 61.8,
49.5, 52.7, 42, 54, 61, 48.2, 39.6, 60.3, 51.7, 49.3, 48, 50.8, 64.7, 61.7, 64,
62, 56.5, 59,47.2, 53, 51.3, 40.5, 37, 36.3, 68, 56.3, 69.8, 67, 39.7, 46, 61.3,
55.3, 55.7, 50, 43.8, 54.5, 59, 57.4, 54, 47, 59.5, 52.8, 56, 45.2, 57, 61.4,
44.8, 51.5, 53, 42, 54);

n=matrix(c(5,3,4,5,4,5,4,2,3,3,5,3,4,3,3,5),4,4); 
n=t(n);
n

##      [,1] [,2] [,3] [,4]
## [1,]    5    3    4    5
## [2,]    4    5    4    2
## [3,]    3    3    5    3
## [4,]    4    3    3    5

Foster=c(); Litter=c();
for(i in 1:4){
  for (j in 1:4){
    Foster=c(Foster, rep(j, n[i,j]));
    Litter=c(Litter, rep(i, n[i,j]));
  }
}

rats2 = data.frame(Weight = Weight, 
                   Litter=as.factor(Litter),
                   Foster=as.factor(Foster))

ggplot(rats2, aes(x=Litter, y=Weight)) + 
  geom_point() + 
  stat_summary(fun.y="mean", geom="line", aes(group=Foster, linetype=Foster))+
  theme(legend.position = "top", legend.direction = "horizontal")

ggplot(rats2, aes(x=Foster, y=Weight)) + 
  geom_point() + 
  stat_summary(fun.y="mean", geom="line", aes(group=Litter, linetype=Litter))+
  theme(legend.position = "top", legend.direction = "horizontal")

Although the ANOVA output changes with the order of the two factors, the information stays the same: only the Foster factor is significant, so pick the one-way ANOVA model with the Foster effect.

anova(lm(Weight ~ Litter * Foster, rats2))

## Analysis of Variance Table
## 
## Response: Weight
##               Df  Sum Sq Mean Sq F value   Pr(>F)   
## Litter         3   60.16  20.052  0.3697 0.775221   
## Foster         3  775.08 258.360  4.7632 0.005736 **
## Litter:Foster  9  824.07  91.564  1.6881 0.120053   
## Residuals     45 2440.82  54.240                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(lm(Weight ~ Foster * Litter, rats2))

## Analysis of Variance Table
## 
## Response: Weight
##               Df  Sum Sq Mean Sq F value   Pr(>F)   
## Foster         3  771.61 257.202  4.7419 0.005869 **
## Litter         3   63.63  21.211  0.3911 0.760004   
## Foster:Litter  9  824.07  91.564  1.6881 0.120053   
## Residuals     45 2440.82  54.240                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1