#--------------------------------- # Derby Data #--------------------------------- mydata = read.csv("http://faculty.chicagobooth.edu/nicholas.polson/teaching/41000/Kentucky_Derby_2018.csv",header=T) head(mydata) tail(mydata) attach(mydata) ts1 <- ts(speedmph, start=1875, end=2016, frequency=1) plot(ts1,xlab="year",ylab="speed mph",main="Kentucky Derby",col=2, lty=2, lwd=2) # we can see obvious time trend in the time series plot # here is a simple way to extract the time trend # after removing the time trend, we can talk about the ARMA model model = lm(speedmph~year_num) speed_tt = model$coef[1] + model$res ts2 <- ts(speed_tt, start=1875, end=2016, frequency=1) plot(ts2,xlab="year",ylab="speed mph",main="time trend removed",col=2, lty=2, lwd=2) # to find the outlier out = which(abs(model$res)>1.645) # label those outliers plot(ts2,xlab="year",ylab="speed mph",main="time trend removed",col=2, lty=2, lwd=2) text(x=out+1874,y=ts2[out],labels=winner[out]) # remove the outliers ts3 = ts2[-out] winner2 = winner[-out] years = c(1:142)[-out]+1874 # plot the acf and pacf plots # an AR model might not be appropriate # but we can consider an MA model # acf(ts3) # pacf(ts3) model3 = arima(ts3,c(0,0,1)) out2 = which(abs(model3$res)>1) winner2[out2] # MA(1) prediction + the time trend ts4 = ts3-model3$res + c(1:142)[-out]*model$coef[2] plot(years,ts4,type="l",ylim=c(32.5,37.5),main="MA(1) predictions, outliers removed", xlab="year",ylab="speed mph",col=2, lty=2, lwd=2) lines(years,ts1[-out],type="l",col=3, lty=3, lwd=3) text(x=years[out2],y=ts1[-out][out2],labels=winner2[out2])