##################################
#  Superbowl and Derby Data
##################################

####################
## Superbowl
####################

# import superbowl data
mydata1 <- read.table("http://faculty.chicagobooth.edu/nicholas.polson/teaching/41000/superbowl1.txt",header=T)

# look at data
head(mydata1)
tail(mydata1)
mydata1

# attach so R recognizes  each variable
attach(mydata1)

# see the distribution of outcome and spread through histograms
hist(Outcome)
hist(Spread)

# we can also calculate the mean and standard deviation
mean(Outcome); sd(Outcome)
mean(Spread); sd(Spread)

# plot Spread vs Outcome
plot(Spread,Outcome)

# add a 45 degree line
abline(1,1)

# correlation 
cor(Spread,Outcome)

# Compare boxplot 
boxplot(Spread,Outcome,horizontal=T,names=c("spread","outcome"),col=c("red","yellow"),main="Superbowl")

######################
## Kentucky Derby
######################

# mydata2 <- read.csv("http://faculty.chicagobooth.edu/nicholas.polson/teaching/41000/Kentucky_Derby_2014.csv",header=T)

# import data from web using Rstudio 
# mydata2 <- Kentucky_Derby_2014

# attach the dataset 
attach(mydata2)

head(mydata2)
tail(mydata2)

# plot a histogram of speedmph
hist(speedmph,col="blue")

# finer bins 
hist(speedmph,breaks=10,col="red")
hist(timeinsec,breaks=10,col="purple")

# to find the left tail observation
k1 <- which(speedmph == min(speedmph))
mydata2[k1,]

# to find the best horse
k2 <- which(speedmph == max(speedmph))
mydata2[k2,]

# working directory
# setwd("~/....") 





############################################