A quick R introduction Helpful Sites: R reference cards (4 pages) (http://www.webpages.uidaho.edu/~stevel/565/R/R-refcard.pdf) Quick R (http://statmethods.net/) "R for Beginners" by Emmanuel Paradis (72 pages) (http://www.webpages.uidaho.edu/~stevel/565/R/R.for.beginners.pdf) "An Introduction to R " by Venables and Smith (93 pages) (http://www.webpages.uidaho.edu/~stevel/565/R/R-intro.pdf) Other R 'official' Manuals (http://cran.r-project.org/manuals.html) Other Contributed Documentation from CRAN (http://cran.r-project.org/other-docs.html) R bloggers (http://www.r-bloggers.com/) ------------------------------------------------------------------------------------------------------------ starting R: ------------------------------------------------------------------------------------------------------------ Standard R functions (built-in R objects/commands; case sensitive) data() help() or ?name getwd() dir() setwd() search() attach() detach() library() ls() list objects in the search path str() ls.str() attributes() summary() history() save.image() savehistory() c() vector() length() matrix() array() dim() data.frame() list() names() Example is.vector(Titanic) is.matrix(Titanic) is.array(Titanic) is.character(Titanic) is.data.frame(Titanic) is.list(Titanic) is.array(Titanic) #arithmetic (# starts a comment line in R) 1:7 see a list of functions at: http://www.statmethods.net/management/functions.html #matrix[row,column] for subsetting a data set in the form of a matrix, #where row (column) could be a number (e.g, 7), a range (e.g., c(2:7,10,12)), or a logical statement (e.g., variable_name>3.14) rm() ending, keep workspace or not? exercise: start R, enter some small datasets, practice using elementary functions ------------------------------------------------------------------------------------------------------------ reading data: ------------------------------------------------------------------------------------------------------------ --------------- type variables first, then join to create a data set ---------------- cgpa <- c(2.70, 2.66, 2.61, 2.23, 2.04, 3.19, 3.35, 2.80, 2.33, 3.61, 2.60, 2.44, 2.44, 2.83, 2.64, 2.40, 3.12, 2.73, 2.66, 2.22, 2.31, 2.33, 2.68, 3.30, 2.54, 3.27, 3.48, 2.43, 2.81, 2.90) salary <- c(29.29, 28.79, 28.98, 24.07, 22.57, 31.37, 31.73, 27.67, 26.88, 29.34, 28.46, 26.44, 26.75, 30.05, 28.58, 27.16, 30.16, 29.80, 27.83, 26.63, 25.97, 26.58, 29.82, 31.99, 29.24, 31.45, 30.66, 27.61, 31.85, 29.73) CollegeData <- data.frame(cgpa,salary) recycling rules ... rm(cgpa,salary) CollegeData ---------------------- type a dataset in directly as a table ------------------------ data <- matrix(scan(),ncol=2,byrow=TRUE) 1 6.08 1 22.29 1 7.51 1 34.36 1 23.68 2 30.45 2 22.71 2 44.52 2 31.47 2 36.81 3 32.04 3 28.03 3 32.74 3 23.84 3 29.64 data tab312 <- data.frame(data) colnames(tab312) <- c("treatment","y") rm(data) tab312 ------------------- from a file or the web ------------------------------------------ ls() cereals1 <- read.table("c:/cjw/stat550/cereals.txt",header=TRUE) cereals1 <- read.table("http://www.webpages.uidaho.edu/~chrisw/stat550live/cereals.txt",header=TRUE) student1 <- read.csv("http://www.webpages.uidaho.edu/~chrisw/stat401/student2.csv",na.strings = ".",header=T) names(cereals1) dim(cereals1) head(cereals1) tail(cereals1) cereals1[,2] cereals1[1,] cereals1[1,3] student1 student1[-c(34),] table(cereals1$type) table(cereals1$type,cereals1$manufacturer) hist(cereals1$calories) ?hist summary(cereals1) attach(cereals1) plot(fat,sodium) plot(density(rating)) for excel use the gdata package ... from "http://data.princeton.edu/R/readingData.html" library(gdata) ffdata = read.xls("http://www.math.smith.edu/r/data/help.xlsx",sheet=1) exercise: take the iris data or your own data, read it in, use names, examine the raw data, and use summary, table, plot ------------------------------------------------------------------------------------------------------------ manipulating data: ------------------------------------------------------------------------------------------------------------ cereals1$sodium2 <- cereals1$sodium^2 sqrt(sodium) log(sodium) cereals1[1:6,1:7] cereals1[1:6,c("cereal","manufacturer","type","calories")] table(manufacturer) QuakerCereals <- cereals1[cereals1$manufacturer == 'Q',] summary(cereals1$sugars) HighSugarCereals <- cereals1[cereals1$sugars >= 7,] HighSugarCereals[1:15,1:6] summary(cereals1$sugars) names(summary(cereals1$sugars)) attributes(summary(cereals1$sugars)) attributes(cereals1) length(summary(cereals1$sugars)) summary(cereals1$sugars)[3] -------------- sorting and merging --------------------------------------- cereals1[order(cereals1$sugars),1:5] cereals1[order(cereals1$sugars),c(1:5,10)] cereals1[cereals1$cereal == "Quaker_Oatmeal",10] <- 0 table(cereals1$manufacturer) cereals1[cereals1$manufacturer=="A",] compnames <- read.table("c:/cjw/rdata/CerealCoNames1.txt",header=TRUE) compnames cereals2 <- merge(cereals1,compnames,by="manufacturer") names(cereals2) cereals2[1:10,c(1:5,16,17)] ------------------ function example ----------------------------- cgpa <- c(2.70, 2.66, 2.61, 2.23, 2.04, 3.19, 3.35, 2.80, 2.33, 3.61, 2.60, 2.44, 2.44, 2.83, 2.64, 2.40, 3.12, 2.73, 2.66, 2.22, 2.31, 2.33, 2.68, 3.30, 2.54, 3.27, 3.48, 2.43, 2.81, 2.90) salary <- c(29.29, 28.79, 28.98, 24.07, 22.57, 31.37, 31.73, 27.67, 26.88, 29.34, 28.46, 26.44, 26.75, 30.05, 28.58, 27.16, 30.16, 29.80, 27.83, 26.63, 25.97, 26.58, 29.82, 31.99, 29.24, 31.45, 30.66, 27.61, 31.85, 29.73) CollegeData <- data.frame(cgpa,salary) rm(cgpa,salary) regandplots <- function(x,y) { regfit <- lm(y ~ x) par(mfrow=c(2,2)) # makes plots appear 4 per page plot(x,y) abline(regfit) lines(lowess(x,y)) plot(fitted(regfit),resid(regfit)) qqnorm(resid(regfit)) qqline(resid(regfit)) par(mfrow=c(1,1)) # restores one plot per page } regandplots(CollegeData$cgpa,CollegeData$salary) exercise: take the iris data or your own data, perform some transformations, create some subsets, define some new data objects, and extract objects from functions like table ------------------------------------------------------------------------------------------------------------ graphical displays: ------------------------------------------------------------------------------------------------------------ data(Titanic) ftable(Titanic) Titanic1<-margin.table(Titanic, 1) barplot(Titanic1) barplot(Titanic1, main="Individuals on the Titanic") pie(Titanic1, main="Individuals on the Titanic") Titanic2<-margin.table(Titanic, c(4,1)) barplot(Titanic2, legend.text=T, main="Survival on the Titanic, By Class") barplot(Titanic2, ylim = c(0,1100), legend.text=T, main="Survival on the Titanic, By Class") barplot(Titanic2, beside=T, ylim = c(0,800), legend.text=T, main="Survival on the Titanic, By Class") data(iris) plength<-iris[,3] species<-iris[,5] stripchart(plength) stripchart(plength, "jitter") stripchart(plength, "stack") stripchart(plength~species, method="stack") boxplot(plength) boxplot(plength~species) hist(plength) hist(plength, breaks="Scott", freq=F) t<-seq(0.5,7,by=0.5) hist(plength,breaks=t) hist(plength,breaks=t+.25) data(state) dimnames(state.x77) illiteracy<-state.x77[,3] murder<-state.x77[,5] plot(illiteracy,murder) plot(Murder~Illiteracy, data=state.x77) plot(illiteracy,murder,col="red", pch = 16, xlim=c(0.2,3)) text(illiteracy,murder,labels=state.name) state.region unclass(state.region) plot(illiteracy,murder,pch=unclass(state.region),xlim=c(0.2,3)) plot(illiteracy,murder,col=unclass(state.region),xlim=c(0.2,3),pch=16) plot(illiteracy,murder,col=unclass(state.region),pch=unclass(state.region), xlim=c(0.2,3), main = "Murder vs. Illiteracy Rates - U.S. States") legend("bottomright",levels(state.region),pch=1:4,col=1:4) identify(illiteracy,murder,state.name) library(lattice) data(melanoma) plot(incidence~year,data=melanoma,type='l') plot(incidence~year,data=melanoma,type='b') plot(incidence~year,data=melanoma,type='o',main="Melanoma Incidence by Year",ylab="melanoma incidence") frost<-state.x77[,7] library(scatterplot3d) scatterplot3d(cbind(illiteracy, murder, frost)) scatterplot3d(cbind(illiteracy, murder, frost), type='h', highlight.3d=T) library(rgl) plot3d(illiteracy,frost,murder) plot3d(illiteracy,frost,murder, type="s") plot3d(illiteracy,frost,murder, type="s", size=.25) plot3d(illiteracy,frost,murder, type="s", size=.25, col="red") plot3d(illiteracy,frost,murder, type="s", size=.25, col=c("red","yellow","blue","green")[unclass(state.region)]) text3d(illiteracy,frost,murder+.25,text=state.name) rgl.snapshot("filename") pairs(iris[,1:4]) pairs(iris[,1:4],pch=16,col=unclass(species)) state<-state.x77[,2:7] pairs(state) pairs(state,pch=16,col=unclass(state.region)) stars(state,key.loc=c(15,1.5)) stars(state,key.loc=c(15,1.5), col.stars=unclass(state.region)+1) library(MASS) parcoord(state,col=unclass(state.region)) legend("topleft",levels(state.region),lty=1,col=1:4) parcoord(iris[,1:4],col=c(1,2,4)[unclass(species)]) legend("topleft",levels(species),lty=1,col=c(1,2,4)) exercise: create several graphical displays using either data sets we have used or your own data