################################## # Superbowl and Derby Data ################################## #################### ## Superbowl #################### # import superbowl data mydata1 <- read.table("http://faculty.chicagobooth.edu/nicholas.polson/teaching/41000/superbowl1.txt",header=T) # look at data head(mydata1) tail(mydata1) mydata1 # attach so R recognizes each variable attach(mydata1) # see the distribution of outcome and spread through histograms hist(Outcome) hist(Spread) # we can also calculate the mean and standard deviation mean(Outcome); sd(Outcome) mean(Spread); sd(Spread) # plot Spread vs Outcome plot(Spread,Outcome) # add a 45 degree line abline(1,1) # correlation cor(Spread,Outcome) # Compare boxplot boxplot(Spread,Outcome,horizontal=T,names=c("spread","outcome"),col=c("red","yellow"),main="Superbowl") ###################### ## Kentucky Derby ###################### # mydata2 <- read.csv("http://faculty.chicagobooth.edu/nicholas.polson/teaching/41000/Kentucky_Derby_2014.csv",header=T) # import data from web using Rstudio # mydata2 <- Kentucky_Derby_2014 # attach the dataset attach(mydata2) head(mydata2) tail(mydata2) # plot a histogram of speedmph hist(speedmph,col="blue") # finer bins hist(speedmph,breaks=10,col="red") hist(timeinsec,breaks=10,col="purple") # to find the left tail observation k1 <- which(speedmph == min(speedmph)) mydata2[k1,] # to find the best horse k2 <- which(speedmph == max(speedmph)) mydata2[k2,] # working directory # setwd("~/....") ############################################