Data rats
is from the faraway
package, which has 48 observations on the following 3 variables.
library(faraway)
?rats
summary(rats)
rats
par(mfrow=c(2,2))
boxplot(time~treat, data=rats, outline=FALSE)
stripchart(time~treat, data=rats, method="jitter",
col="blue", vertical=TRUE, add=TRUE)
boxplot(time~poison, data=rats, outline=FALSE)
stripchart(time~poison, data=rats, method="jitter",
col="blue", vertical=TRUE, add=TRUE)
interaction.plot(rats$treat, rats$poison, rats$time)
interaction.plot(rats$poison, rats$treat, rats$time)
Try ggplot2
. Save the 4 plots and then use the multiplot
function from here.
library(ggplot2)
# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
p1 = ggplot(aes(x=treat, y=time), data=rats) +
geom_boxplot() +
theme(axis.text.x = element_text(angle=-45)) +
ylab("Survival Time")
p2 = ggplot(aes(x=poison, y=time), data=rats) +
geom_boxplot() +
theme(axis.text.x = element_text(angle=-45)) +
ylab("Survival Time")
p3 = ggplot(rats, aes(x=poison, y=time)) +
geom_point() +
stat_summary(fun.y="mean", geom="line", aes(group=treat, linetype=treat))+
theme(legend.position = "top", legend.direction = "horizontal")
p4 = ggplot(rats, aes(x=treat, y=time)) +
geom_point() +
stat_summary(fun.y="mean", geom="line", aes(group=poison, linetype=poison))+
theme(legend.position = "top", legend.direction = "horizontal")
multiplot(p1, p2, p3, p4, cols=2)
g=lm(time ~ poison*treat, rats)
anova(g)
## Analysis of Variance Table
##
## Response: time
## Df Sum Sq Mean Sq F value Pr(>F)
## poison 2 1.03301 0.51651 23.2217 3.331e-07 ***
## treat 3 0.92121 0.30707 13.8056 3.777e-06 ***
## poison:treat 6 0.25014 0.04169 1.8743 0.1123
## Residuals 36 0.80073 0.02224
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(1,2))
qqnorm(g$res)
plot(g$fitted, g$res, xlab="FItted", ylab="Residuals")
Notice the trumpet pattern for the residuals, so try Box-cox transformation.
library(MASS)
boxcox(g)
Try the reciprocal transformation
g=lm(1/time ~ poison*treat, rats)
anova(g)
## Analysis of Variance Table
##
## Response: 1/time
## Df Sum Sq Mean Sq F value Pr(>F)
## poison 2 34.877 17.4386 72.6347 2.310e-13 ***
## treat 3 20.414 6.8048 28.3431 1.376e-09 ***
## poison:treat 6 1.571 0.2618 1.0904 0.3867
## Residuals 36 8.643 0.2401
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(1,2))
qqnorm(g$res)
plot(g$fitted, g$res, xlab="FItted", ylab="Residuals")
Pairwise CIs for treat
.
TukeyHSD(aov(1/time ~ poison + treat, data=rats), "treat")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = 1/time ~ poison + treat, data = rats)
##
## $treat
## diff lwr upr p adj
## B-A -1.6574024 -2.1959343 -1.11887050 0.0000000
## C-A -0.5721354 -1.1106673 -0.03360355 0.0335202
## D-A -1.3583383 -1.8968702 -0.81980640 0.0000002
## C-B 1.0852669 0.5467351 1.62379883 0.0000172
## D-B 0.2990641 -0.2394678 0.83759598 0.4550931
## D-C -0.7862029 -1.3247347 -0.24767096 0.0018399
myCIs = TukeyHSD(aov(1/time ~ poison + treat, data=rats), "treat")
plot(myCIs)
Check how Tukey’s intervals are computed.
treat.CI = TukeyHSD(aov(1/time ~ poison + treat, data=rats), "treat")$treat
treat.CI[,1]-treat.CI[,2] # half-width of the CIs
## B-A C-A D-A C-B D-B D-C
## 0.5385319 0.5385319 0.5385319 0.5385319 0.5385319 0.5385319
qtukey(0.95,4,42)*summary(g)$sigma*sqrt(2/(48/4))/sqrt(2)
## [1] 0.5350869
Do the same for the pairwise CIs for poison
poison.CI=TukeyHSD(aov(1/time ~ poison + treat, data=rats), "poison")$poison
poison.CI[,1]-poison.CI[,2] # half-width of the CIs
qtukey(0.95,3,42)*summary(g)$sigma*sqrt(2/(48/3))/sqrt(2)
We remove the 1st obesrvation from the data rats
to create a two-factor example with unblanced design.
newrats=rats;
newrats=newrats[-1,]
Although the ANOVA output changes with the order of the two factors, the information stays the same: the interaction is not significant; the two factors are significant no matter whether the other factor is included or not included in the model. So the data support the additive model.
anova(lm(1/time ~ poison*treat, newrats))
## Analysis of Variance Table
##
## Response: 1/time
## Df Sum Sq Mean Sq F value Pr(>F)
## poison 2 36.672 18.3358 81.0799 7.276e-14 ***
## treat 3 18.567 6.1889 27.3670 2.706e-09 ***
## poison:treat 6 1.980 0.3300 1.4592 0.2207
## Residuals 35 7.915 0.2261
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(1/time ~ treat*poison, newrats))
## Analysis of Variance Table
##
## Response: 1/time
## Df Sum Sq Mean Sq F value Pr(>F)
## treat 3 20.136 6.7121 29.6807 9.986e-10 ***
## poison 2 35.102 17.5510 77.6094 1.362e-13 ***
## treat:poison 6 1.980 0.3300 1.4592 0.2207
## Residuals 35 7.915 0.2261
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The experiment had litters of rats born to one mother but raised by another. The row factor is the genotype of the litter and the column factor is the genotype of the foster mother. There are n=61 litters, and the y’s are the average weight of the litters in grams at 28 days. The design is unbalanced.
Weight=c(61.5, 68.2, 64,65,59.7, 55, 42, 60.2, 52.5, 61.8,
49.5, 52.7, 42, 54, 61, 48.2, 39.6, 60.3, 51.7, 49.3, 48, 50.8, 64.7, 61.7, 64,
62, 56.5, 59,47.2, 53, 51.3, 40.5, 37, 36.3, 68, 56.3, 69.8, 67, 39.7, 46, 61.3,
55.3, 55.7, 50, 43.8, 54.5, 59, 57.4, 54, 47, 59.5, 52.8, 56, 45.2, 57, 61.4,
44.8, 51.5, 53, 42, 54);
n=matrix(c(5,3,4,5,4,5,4,2,3,3,5,3,4,3,3,5),4,4);
n=t(n);
n
## [,1] [,2] [,3] [,4]
## [1,] 5 3 4 5
## [2,] 4 5 4 2
## [3,] 3 3 5 3
## [4,] 4 3 3 5
Foster=c(); Litter=c();
for(i in 1:4){
for (j in 1:4){
Foster=c(Foster, rep(j, n[i,j]));
Litter=c(Litter, rep(i, n[i,j]));
}
}
rats2 = data.frame(Weight = Weight,
Litter=as.factor(Litter),
Foster=as.factor(Foster))
ggplot(rats2, aes(x=Litter, y=Weight)) +
geom_point() +
stat_summary(fun.y="mean", geom="line", aes(group=Foster, linetype=Foster))+
theme(legend.position = "top", legend.direction = "horizontal")
ggplot(rats2, aes(x=Foster, y=Weight)) +
geom_point() +
stat_summary(fun.y="mean", geom="line", aes(group=Litter, linetype=Litter))+
theme(legend.position = "top", legend.direction = "horizontal")
Although the ANOVA output changes with the order of the two factors, the information stays the same: only the Foster factor is significant, so pick the one-way ANOVA model with the Foster effect.
anova(lm(Weight ~ Litter * Foster, rats2))
## Analysis of Variance Table
##
## Response: Weight
## Df Sum Sq Mean Sq F value Pr(>F)
## Litter 3 60.16 20.052 0.3697 0.775221
## Foster 3 775.08 258.360 4.7632 0.005736 **
## Litter:Foster 9 824.07 91.564 1.6881 0.120053
## Residuals 45 2440.82 54.240
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(Weight ~ Foster * Litter, rats2))
## Analysis of Variance Table
##
## Response: Weight
## Df Sum Sq Mean Sq F value Pr(>F)
## Foster 3 771.61 257.202 4.7419 0.005869 **
## Litter 3 63.63 21.211 0.3911 0.760004
## Foster:Litter 9 824.07 91.564 1.6881 0.120053
## Residuals 45 2440.82 54.240
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1