##################################
#  Problem 1 Superbowl and Derby Data
##################################

####################
## Superbowl
####################

# import superbowl data
mydata1 <- read.table("http://faculty.chicagobooth.edu/nicholas.polson/teaching/41000/superbowl1.txt",header=T)

# look at data
head(mydata1)
tail(mydata1)
mydata1

# attach so R recognizes  each variable
attach(mydata1)

# see the distribution of outcome and spread through histograms
hist(Outcome)
hist(Spread)

# we can also calculate the mean and standard deviation
mean(Outcome); sd(Outcome)
mean(Spread); sd(Spread)

# plot Spread vs Outcome
plot(Spread,Outcome)

# add a 45 degree line
abline(1,1)

# correlation 
cor(Spread,Outcome)

# Compare boxplot 
boxplot(Spread,Outcome,horizontal=T,names=c("spread","outcome"),col=c("red","yellow"),main="Superbowl")


#---------------------------------
# Derby Data
#---------------------------------

mydata = read.csv("http://faculty.chicagobooth.edu/nicholas.polson/teaching/41000/Kentucky_Derby_2018.csv",header=T)
head(mydata)
tail(mydata)
attach(mydata)


ts1 <- ts(speedmph, start=1875, end=2018, frequency=1) 
plot(ts1,xlab="year",ylab="speed mph",main="Kentucky Derby",col=2, lty=2, lwd=2)


# plot a histogram of speedmph
hist(speedmph,col="blue")

# finer bins 
hist(speedmph,breaks=10,col="red")
hist(timeinsec,breaks=10,col="purple")

# to find the left tail observation
k1 <- which(speedmph == min(speedmph))
mydata[k1,]

# to find the best horse
k2 <- which(speedmph == max(speedmph))
mydata[k2,]


##################################
#  Problem 2 
##################################


# install and import the package fImport
# extract data from yahoo finance
install.packages("quantmod")
install.packages("moments")
library("moments")
library("quantmod")

getSymbols('BRK-A', from = "1990-01-01")

BRKA = get('BRK-A')
BRKA = BRKA[,4]

# take a look of the data
head(BRKA)

plot(BRKA,type="l",col=20,main="BRKA Share Price",
     ylab="Price",xlab="Time",bty='n')

# calculate the simple return
N <- length(BRKA) 
y = as.vector(BRKA)
ret <- rep(NA,N-1) # create a null sequence  
for(t in 1:(N-1))
{
  ret[t] <- (y[t+1]-y[t])/y[t]	
}

# create summaries of ret for BRK-A
options(digits=3) # control the digit of numbers
summary(ret)
sd(ret)
skewness(ret)
kurtosis(ret)

par(mfrow=c(1,2)) # combine two plots in a 1*2 row

# time series plot of price
# to save the plot,click "Export" ans "save as image"
# create a time series object in R that we can add time domain
y_ts = ts(y, start=c(1990,1,1), end=c(2014,9,30), frequency=252)
ts.plot(y_ts,main="BRK-A price time series",ylab="price",xlab="day")
# histogram of returns
hist(ret,breaks=50,main="BRK-A daily returns")

# plots to show serial correlation in 1st and 2nd moments
# to save the plot,click "Export" ans "save as image"
par(mfrow=c(1,2))
acf(ret,lag.max=10,main="serial correlation in 1st moment")
acf(ret^2,lag.max=10,main="serial correlation in 2nd moment")

dev.off() # stop the above combine function


############################################

##################################
#  Problem 5
##################################
SP.Index = c(-37,26.6,15.1,2.1,16,32.3,13.6,1.4)
Hedge.Funds = c(-23.9,15.9,8.5,-1.9,6.5,11.8,5.6,1.7)
SP.Index = SP.Index/100
Hedge.Funds = Hedge.Funds/100
mean(SP.Index-Hedge.Funds)
sd(SP.Index-Hedge.Funds)


# t test
t.test(SP.Index,Hedge.Funds,paired = TRUE)


# who will win?
# run simulations

m1 = mean(SP.Index)
s1 = sd(SP.Index)
m2 = mean(Hedge.Funds)
s2 = sd(Hedge.Funds)
N = 1000 
cum1 = NULL # cumulative return of SP.Index
cum2 = NULL # cumulative return of Hedge.Funds
set.seed(410)
for (i in 1:N)
{ 
  # simulate returns in 2016 and 2017
  return1 = c(0.657, rnorm(n=2, mean = m1, sd = s1)) 
  # compute the cumulative return
  cum1 = c(cum1, cumprod(1+return1)) 

  return2 = c(0.219, rnorm(n=2, mean = m2, sd = s2))
  cum2 = c(cum2, cumprod(1+return2))
}
 
# compute the prob. of Buffett winning
mean(cum1>cum2)