CanPres1 <- read.table("c:/temp/prestige.txt",header=T)
names(CanPres1)

 # delete four cases with missing values for the type variable
CanPres1 <- CanPres1[!is.na(CanPres1$type),]


 # defining dummy variables
CanPres1$prof <- as.numeric(CanPres1$type == "prof")
CanPres1$wc <- as.numeric(CanPres1$type == "wc")

 # defining crossproduct variables used to test for interaction
CanPres1$profeduc    <- CanPres1$prof*CanPres1$education
CanPres1$profincome  <-  CanPres1$prof*CanPres1$income
CanPres1$wceduc    <- CanPres1$wc*CanPres1$education
CanPres1$wcincome  <-  CanPres1$wc*CanPres1$income

 # a numeric variable for occupation group
CanPres1$typegroup  <-  CanPres1$prof + 2*CanPres1$wc

 
 # regression sums of squares from table 7.1

# I,E,T, I x T, E x T 
sum(anova(lm(prestige ~ income +education +as.factor(typegroup) +income:as.factor(typegroup)
              +education:as.factor(typegroup),data=CanPres1))$"Sum Sq"[1:5])

# I,E,T, I x T, E x T can also be written without the as.factor() function as
sum(anova(lm(prestige ~ income +education +prof +wc +profeduc +profincome
              +wceduc +wcincome,data=CanPres1))$"Sum Sq"[1:8])


# I,E,T, I x T 
sum(anova(lm(prestige ~ income +education +as.factor(typegroup) +income:as.factor(typegroup),
              data=CanPres1))$"Sum Sq"[1:4])

# I,E,T, E x T 
sum(anova(lm(prestige ~ income +education +as.factor(typegroup) +education:as.factor(typegroup),
              data=CanPres1))$"Sum Sq"[1:4])

# I,E,T 
sum(anova(lm(prestige ~ income +education +as.factor(typegroup),
              data=CanPres1))$"Sum Sq"[1:3])

# I,E  
sum(anova(lm(prestige ~ income +education, data=CanPres1))$"Sum Sq"[1:2])


# I,T, I x T
sum(anova(lm(prestige ~ income +as.factor(typegroup) +income:as.factor(typegroup),
              data=CanPres1))$"Sum Sq"[1:3])

# E,T, E x T
sum(anova(lm(prestige ~ education +as.factor(typegroup) 
              +education:as.factor(typegroup),data=CanPres1))$"Sum Sq"[1:3])



 # conducting tests
 # Note that the anova function calculates Type I SS (Sequential), so does not match Table 7.2 in the text
 # Type I SS do not obey the principle of marginality

anova(lm(prestige ~ income +education +as.factor(typegroup) +income:as.factor(typegroup)
              +education:as.factor(typegroup),data=CanPres1))


 # the car package contains a function 'Anova' that can calculate Type II or Type III SS
 # it calculates Type II SS by default

library(car)

 # this produces the same results as in Table 7.2
Anova(lm(prestige ~ income +education +as.factor(typegroup) +income:as.factor(typegroup)
              +education:as.factor(typegroup),data=CanPres1),type="II")

Anova(lm(prestige ~ income +education +as.factor(typegroup) +income:as.factor(typegroup)
              +education:as.factor(typegroup),data=CanPres1),type="III")