CanPrestige1 <- read.table("c:/temp/prestige.txt",header=T) names(CanPrestige1) CanPrestige1.lm1 <- lm(prestige ~ education +income +women, data=CanPrestige1) summary(CanPrestige1.lm1) anova(CanPrestige1.lm1) # automatically generates several useful plots par(mfrow=c(2,2)) plot(CanPrestige1.lm1) par(mfrow=c(1,1)) # this plots Studentized residuals vs. Leverage values, with reference lines # to help identify points that have high leverage and are outliers plot(hatvalues(CanPrestige1.lm1),rstudent(CanPrestige1.lm1),xlim=c(0,.4),main=c("Studentized Residuals vs. Hat values")) abline(h=-2) abline(h=2) abline(v=2*length(CanPrestige1.lm1$coefficients)/length(CanPrestige1.lm1$residuals)) # reference line at 2p/n # this is the same plot, but now labels observations, too busy for most points but helps with the extreme points plot(hatvalues(CanPrestige1.lm1),rstudent(CanPrestige1.lm1),xlim=c(0,.4),main=c("Studentized Residuals vs. Hat values")) abline(h=-2) abline(h=2) abline(v=2*length(CanPrestige1.lm1$coefficients)/length(CanPrestige1.lm1$residuals)) # reference line at 2p/n text(hatvalues(CanPrestige1.lm1),rstudent(CanPrestige1.lm1),labels=rownames(CanPrestige1.lm1$model),pos=4,cex=.5) # residuals against covariates par(mfrow=c(2,2)) plot(CanPrestige1$education,resid(CanPrestige1.lm1),xlab="Education Values", ylab="Residuals",main="Residual by Education plot") plot(CanPrestige1$income,resid(CanPrestige1.lm1),xlab="Income Values", ylab="Residuals",main="Residual by Income plot") plot(CanPrestige1$women,resid(CanPrestige1.lm1),xlab="Percent Women", ylab="Residuals",main="Residual by Gender Composition plot") par(mfrow=c(1,1)) # additional plots from the car package by the author of our text library(car) plot(cookd(CanPrestige1.lm1)) # this plot allows you to interactively indentify observations influencePlot(CanPrestige1.lm1)