######################################### # # 41901: Probability and Statistics # ######################################### ################################## # # Keynes Data # ################################## keynes = read.table("keynes.txt",header=T) attach(keynes) # Plot the data plot(Year,Keynes,pch=20,col=34) plot(Market, Keynes, xlab="Market Return", ylab="Keynes Excess Return",col=20) # correlation matrix cor(cbind(Keynes,Market)) # Fit the least-squares line. model = lm(Keynes~Market) abline(model) title("Keynes vs Market") # Extract the model coefficients coef(model) summary(model) # 4-in-1 residual diagnostics layout(matrix(c(1,2,3,4),2,2)) plot(model,pch=20) # Calculate excess return Keynes = Keynes - Rate Market = Market - Rate # correlation matrix cor(cbind(Keynes,Market)) modelnew = lm(Keynes~Market) summary(modelnew) ################################## # # Buffett Data # ################################## buffett = read.table("buffett.txt",header=T) attach(buffett) # Plot the data plot(Market, Buffett, xlab="Market Return", ylab="Buffett Return",pch=20) # correlation matrix cor(cbind(Buffett,Market)) # Fit the least-squares line and superimpose this line on the plot model = lm(Buffett~Market) abline(model) title("Buffett vs Market") # Extract the model coefficients coef(model) summary(model) # Improvement in Fit sd(model$residuals) sd(Buffett) # Remove datapoint 10 buffett_10 = buffett[-10,] ################################## # # Superbowl Data # ################################## superbowl1 = read.table("superbowl1.txt",header=T) attach(superbowl1) # Plot the data plot(Outcome,Spread,pch=20) # Team names par(mfrow=c(1,2)) plot(Outcome,Spread,col=0) text(Outcome,Spread,labels=Favorite,col=4) plot(Outcome,Spread,col=0) text(Outcome,Spread,labels=Underdog,col=6) # correlation matrix cor(cbind(Outcome,Spread)) # Build Model super <- lm(Outcome ~ Spread) summary(super) # Graphics plot(Outcome,Spread,pch=20,col=4) abline(super,col=2) abline(a=0,b=1,col=3) title("Superbowl Data") # Residual Analysis qqnorm(rstudent(super),col=20) abline(a=0,b=1) hist(rstudent(super),col=11,main="residual") ######################### # Logistic Regression ######################### #setwd("C:/Users/ngp/Documents/41000solns") NBA = read.csv("NBAspread.csv") attach(NBA) n = nrow(NBA) par(mfrow=c(1,2)) hist(NBA$spread[favwin==1], col=5, main="", xlab="spread") hist(NBA$spread[favwin==0], add=TRUE, col=6) legend("topright", legend=c("favwin=1", "favwin=0"), fill=c(5,6), bty="n") boxplot(NBA$spread ~ NBA$favwin, col=c(6,5), horizontal=TRUE, ylab="favwin", xlab="spread") nbareg = glm(favwin~spread-1, family=binomial) s = seq(0,30,length=100) fit = exp(s*nbareg$coef[1])/(1+exp(s*nbareg$coef[1])) plot(s, fit, typ="l", col=4, lwd=2, ylim=c(0.5,1), xlab="spread", ylab="P(favwin)") bic = cbind( extractAIC(nbareg, k=log(553)), extractAIC(glm(favwin ~ spread, family=binomial), k=log(553)), extractAIC(glm(favwin ~ spread + favhome, family=binomial), k=log(553)))[2,] ebic = exp(-.5*(bic-min(bic))) round(ebic/sum(ebic),2) thisweek=c(8,4) pred = nbareg$coef[1]*thisweek exp(pred)/(1+exp(pred)) ######################### # Lasso ######################### library(genlasso) n = 20 p = 10 r = 0.999 y = rnorm(n) X = matrix(rnorm(n * p), nrow = n) D = matrix(r, nrow = p, ncol = p) + diag(1 - r, p) fit.genlasso = genlasso::genlasso(y, X, D) coef(fit.genlasso, lambda=sqrt(n*log(p))) plot(fit.genlasso) library(glmnet) n = 200 p = 50 X = matrix(rnorm(n * p), nrow = n) # only first half X variables are used to generate Y beta_true = c(rnorm(p * 0.5), rep(0, p * 0.5)) Y = X %*% beta_true + rnorm(n) # cross validation, alpha = 1 means Lasso shrinkage cv.fit = cv.glmnet(x = X, y = Y, alpha = 1) plot(cv.fit) # best lambda cv.fit$lambda.min fit = glmnet(x = X, y = Y, alpha = 1, lambda = cv.fit$lambda.min) # plot beta against fitted value plot(beta_true, fit$beta, pch=20, col="maroon") ######################### # Asthma Data ######################### library(glmnet) # simulate data age <- c(4,8,7,12,6,9,10,14,7) gender <- c(1,0,1,1,1,0,1,0,0) ; gender<- as.factor(gender) bmi_p <- c(0.86,0.45,0.99,0.84,0.85,0.67,0.91,0.29,0.88) m_edu <- c(0,1,1,2,2,3,2,0,1); m_edu<- as.factor(m_edu) p_edu <- c(0,2,2,2,2,3,2,0,0); p_edu<- as.factor(p_edu) f_color <- c("blue", "blue", "yellow", "red", "red", "yellow", "yellow", "red", "yellow") asthma <- c(1,1,0,1,0,0,0,1,1) f_color <- as.factor(f_color) xfactors <- model.matrix(asthma ~ gender + m_edu + p_edu + f_color)[,-1] x <- as.matrix(data.frame(age, bmi_p, xfactors)) # alpha =1 for lasso glmmod<-glmnet(x,y=as.factor(asthma),alpha=1,family='binomial') #plot variable coefficients vs. shrinkage parameter lambda. plot(glmmod,xvar="lambda") grid() # Results cv.glmmod <- cv.glmnet(x,y=asthma,alpha=1) plot(cv.glmmod) best_lambda <- cv.glmmod$lambda.min best_lambda ######################################### # # Prostate Data using glmnet # ########################################## prostate.data = read.table('http://statweb.stanford.edu/~tibs/ElemStatLearn/datasets/prostate.data')[,-10] # rescale all the data (except response) # this appears to be what tibshirani and folks use. x.model = as.matrix(scale(prostate.data[,1:8])) lnam = colnames(prostate.data)[-9] # lasso glm.lasso.cv = cv.glmnet(x.model, prostate.data$lpsa) glm.lasso.cv$lambda.min coef(glm.lasso.cv, s = glm.lasso.cv$lambda.min) plot(glm.lasso.cv) grid() fit = glm.lasso.cv$glmnet.fit plot(fit, xvar="lambda") grid() xpos = min(log(fit$lambda)) text(rep(xpos,8), fit$beta[, ncol(fit$beta)],lnam[nonzeroCoef(fit$beta)], cex = 0.5) # ridge glm.ridge.cv = cv.glmnet(x.model, prostate.data$lpsa,alpha=0) glm.ridge.cv$lambda.min coef(glm.ridge.cv, s = glm.ridge.cv$lambda.min) plot(glm.ridge.cv) grid() fit = glm.ridge.cv$glmnet.fit plot(fit, xvar="lambda") grid() xpos = min(log(fit$lambda)) text(rep(xpos,8), fit$beta[, ncol(fit$beta)],lnam[nonzeroCoef(fit$beta)], cex = 0.5) #################################################### # # Ridge Regression. n=50, p=30 predictors # ################################################### set.seed(0) n = 50 p = 30 x = matrix(rnorm(n*p),nrow=n) bstar = c(runif(10,0.5,1),runif(20,0,0.3)) mu = as.numeric(x%*%bstar) par(mar=c(4.5,4.5,0.5,0.5)) hist(bstar,breaks=30,col="gray",main="", xlab="True coefficients") library(MASS) set.seed(1) R = 100 nlam = 60 lam = seq(0,25,length=nlam) fit.ls = matrix(0,R,n) fit.rid = array(0,dim=c(R,nlam,n)) err.ls = numeric(R) err.rid = matrix(0,R,nlam) for (i in 1:R) { cat(c(i,", ")) y = mu + rnorm(n) ynew = mu + rnorm(n) a = lm(y~x+0) bls = coef(a) fit.ls[i,] = x%*%bls err.ls[i] = mean((ynew-fit.ls[i,])^2) aa = lm.ridge(y~x+0,lambda=lam) brid = coef(aa) fit.rid[i,,] = brid%*%t(x) err.rid[i,] = rowMeans(scale(fit.rid[i,,],center=ynew,scale=F)^2) } aveerr.ls = mean(err.ls) aveerr.rid = colMeans(err.rid) bias.ls = sum((colMeans(fit.ls)-mu)^2)/n var.ls = sum(apply(fit.ls,2,var))/n bias.rid = rowSums(scale(apply(fit.rid,2:3,mean),center=mu,scale=F)^2)/n var.rid = rowSums(apply(fit.rid,2:3,var))/n mse.ls = bias.ls + var.ls mse.rid = bias.rid + var.rid prederr.ls = mse.ls + 1 prederr.rid = mse.rid + 1 bias.ls var.ls p/n prederr.ls aveerr.ls cbind(prederr.rid,aveerr.rid) par(mar=c(4.5,4.5,0.5,0.5)) plot(lam,prederr.rid,type="l", xlab="Amount of shrinkage",ylab="Prediction error") abline(h=prederr.ls,lty=2) text(c(1,24),c(1.48,1.48),c("Low","High")) legend("topleft",lty=c(2,1), legend=c("Linear regression","Ridge regression")) par(mar=c(4.5,4.5,0.5,0.5)) plot(lam,mse.rid,type="l",ylim=c(0,max(mse.rid)), xlab=expression(paste(lambda)),ylab="") lines(lam,bias.rid,col="red") lines(lam,var.rid,col="blue") abline(h=mse.ls,lty=2) legend("bottomright",lty=c(2,1,1,1), legend=c("Linear MSE","Ridge MSE","Ridge Bias^2","Ridge Var"), col=c("black","black","red","blue")) ################################################ # # Baseball Shrinkage Data: Efron and Morris # ################################################ bball = read.table("http://www.swarthmore.edu/NatSci/peverso1/Sports%20Data/JamesSteinData/Efron-Morris%20Baseball/EfronMorrisBB.txt",header=TRUE, stringsAsFactors=FALSE) bball$js = bball$BattingAverage * .212 + .788 * (0.265) bball$LastName[!is.na(match(bball$LastName, c("Scott","Williams", "Rodriguez", "Unser","Swaboda","Spencer")))] = "" a = matrix(rep(1:3, nrow(bball)), 3, nrow(bball)) b = matrix(c(bball$BattingAverage, bball$SeasonAverage, bball$js), 3, nrow(bball), byrow=TRUE) matplot(a, b, pch=" ", ylab="predicted average", xaxt="n", xlim=c(0.5, 3.1), ylim=c(0.13, 0.42)) matlines(a, b) text(rep(0.7, nrow(bball)), bball$BattingAverage, bball$LastName, cex=0.6) text(1, 0.14, "First 45\nat bats", cex=0.5) text(2, 0.14, "Average\nof remainder", cex=0.5) text(3, 0.14, "J-S\nestimator", cex=0.5) ################################################ # # Bayes Kelly Criterion # ############################################# #Data = read.csv("marketmodel.csv",header=T) Data = read.csv("applesp500.csv",header=T) attach(Data) stock1 <- "AAPL" stock2 <- "SP500" ## Wealth ratios xik <- Data[,c(stock1,stock2)] xik <- 1+xik/100 ## Random vs cash example # x1 <- 0.85+0.33*rbinom(100,1,0.5) x1 <- 0.6+1.0*rbinom(100,1,0.5) x2 <- rep(1,100) xik <- data.frame(cbind(x1,x2)) stock1 <- "random" stock2 <- "cash" names(xik) <- c("random","cash") ## Keynes Example: vs market. lev=3 works. optimal lev=2.5. blow-up at lev=4. Data = read.table("keynes.txt",header=T) attach(Data) xik <- Data[,c("Keynes","Market")] xik <- 1+xik/100 stock1 <- "Keynes" stock2 <- "Market" ## Keynes Example: vs cash. lev=2.4 is blow-up. levl=1.0, lev=1.5. universal does better. Data = read.table("keynes.txt",header=T) attach(Data) xik <- Data[,c("Keynes","Rate")] xik <- 1+xik/100 stock1 <- "Keynes" stock2 <- "Rate" ## Parrando Example 1: More volatility to exploit # p=0.515 gives max wealth 0.5. very sensitive N1 <- 5000 x1 <- 0.90+0.2*rbinom(N1,1,0.52) x2 <- 0.90+0.2*rbinom(N1,1,0.52) xik <- data.frame(cbind(x1,x2)) stock1 <- "Parrando1" stock2 <- "Parrando2" names(xik) <- c("Parrando1","Parrando2") ## Parrando Example 2: Stutzer # p=0.51 and f=0.5 gives max wealth 0.5. very sensitive N1 <- 1000 x1 <- 0.975+0.05*rbinom(N1,1,0.51) x2 <- 0.975+0.05*rbinom(N1,1,0.51) xik <- data.frame(cbind(x1,x2)) stock1 <- "Parrando1" stock2 <- "Parrando2" names(xik) <- c("Parrando1","Parrando2") ## Parrando Example 2 N1 <- 5000 x1 <- 0.95+0.1*rbinom(N1,1,0.507) x2 <- 0.95+0.1*rbinom(N1,1,0.507) xik <- data.frame(cbind(x1,x2)) stock1 <- "Parrando1" stock2 <- "Parrando2" names(xik) <- c("Parrando1","Parrando2") ## Plot nDays <- dim(xik)[1] Days <- 1:nDays ## crp returns, weight alpha crp <- function(x,w) { temp=w[1]*x[,1]+w[2]*x[,2] cumprod(temp) } ## Plot crp portfolios pik <- apply(xik,2,cumprod) labNames <- c('stock1','stock2') title <- sprintf("%s and %s returns", stock1, stock2) titlem <- sprintf("Mix of %s and %s", stock1, stock2) titlef <- sprintf("Fraction of %s in Portfolio", stock1) titleu <- sprintf("Universal Portfolios with %s and %s", stock1, stock2) title1 <- sprintf("%s", stock1) title2 <- sprintf("%s", stock2) plot(Days, pik[,stock1], col="blue", type="l", ylim=range(pik), main=title,ylab="") lines(Days, pik[,stock2], col="red") grid() legend("topright",c(title1,title2), col=c("blue","red"),lty=c(1,1)) # Plot nDays <- dim(xik)[1] Days <- 1:nDays # Add leverage lev <- 2.5 alphas <- seq(0,lev,by=0.05) crps <- alphas for (i in 1:length(crps)) { crps[i] <- crp(xik, c(alphas[i], 1-alphas[i]))[nDays] } plot(alphas, crps, col="blue", type="l", ylab="", main=titlem, xlab=titlef) points(alphas, crps, pch=19, cex=0.5, col="red") abline(h=mean(crps), col="green") text(0.5,mean(crps)*1.05,labels="Return from Bayes Portfolio") grid() # Plot nDays <- dim(xik)[1] Days <- 1:nDays pik <- apply(xik,2,cumprod) levl <- 0.0 lev <- 1.0 alphas <- seq(levl,lev,by=0.05) universal <- xik[,1] * 0 for (i in 1:length(alphas)) { universal <- universal + crp(xik, c(alphas[i], 1-alphas[i])) } universal <- universal/length(alphas) plot(Days, pik[,stock1], col="blue", type="l", ylim=range(pik, universal), main = titleu, ylab="") lines(Days, pik[,stock2], col="red") lines(Days, universal, col="green") legend("topleft",c(title1,title2,'"universal"'), col=c("blue","red","green"),lty=c(1,1,1)) grid() ############################## # # Longest runs # ############################## longest.run <- function(tosses) { #computes length of longest run of heads or tails max.run = 1 #initialize longest run cur.run = 1 #initialize length of current run l = length(tosses) for(i in 2:l) { if(tosses[i]==tosses[i-1]) cur.run = cur.run + 1 else cur.run = 1 if(cur.run>max.run) max.run = cur.run #keep track of longest run } return (max.run) } # Example: my.toss=runif(25)<0.5 #this defines a sequence of tosses, where FALSE means Head, and TRUE means Tails longest.run(my.toss) #this returns the longest run in the sequence #Let?s run a simulation to estimate the distribution of the longest run: sims=10000 runs=matrix(NA,nrow=sims) for (i in 1:sims) { my.toss=runif(25)<0.5 #generate a random sequence of Heads and Tails runs[i]=longest.run(my.toss) } hist(runs) ##################################### # Elections: Bayes and Nate Silver # ##################################### # # # Election 2012 # library(LearnBayes) library(plyr) library(lattice) file1 <- "http://www.electoral-vote.com/evp2012/Pres/pres_polls.csv" election.2012 <- read.csv(file = file1) # Remove a pollster: elect2012 <- election.2012[!grepl('Rasmussen', election.2012$Pollster),] elect2012 <- election.2012 # Aggregrate the data elect2012 <- ddply(elect2012, .(State), subset, Day == max(Day)) elect2012 <- ddply(elect2012, .(State), summarise, R.pct = mean(GOP), O.pct = mean(Dem), EV = mean(EV)) # Run the Simulation prob.Obama <- function(mydata) { p <- rdirichlet(1000, 500 * c(mydata$R.pct, mydata$O.pct, 100 - mydata$R.pct - mydata$O.pct)/100 + 1) mean(p[, 2] > p[, 1]) } win.probs <- ddply(elect2012, .(State), prob.Obama) win.probs$Romney <- 1 - win.probs$V1 names(win.probs)[2] <- "Obama" win.probs$EV <- elect2012$EV win.probs <- win.probs[order(win.probs$EV), ] rownames(win.probs) <- win.probs$state # Plot Probabilities by State barplot(t(as.matrix(win.probs[, 2:3])), legend = TRUE, args.legend = list(x = "topright", cex = 0.5), names.arg = win.probs$State, horiz = TRUE, cex.names = 0.5, las = 2, xlab = "Probability", main = "Probability of Obama or Romney by State") # Simulation among these probabilities sim.election <- function(win.probs) { winner <- rbinom(51, 1, win.probs$Obama) sum(win.probs$EV * winner) } sim.EV <- replicate(10000, sim.election(win.probs)) oprob <- sum(sim.EV >= 270)/length(sim.EV) # probability of Obama having >270 EV or more: oprob ## [1] 0.92 # Lattice Graph pdf("bayes-obama-2012.pdf") densityplot(sim.EV, plot.points = "rug", xlab = "Electoral Votes for Obama", panel = function(x, ...) { panel.densityplot(x, ...) panel.abline(v = 270) panel.text(x = 285, y = 0.01, "270 EV to Win") panel.abline(v = 332) panel.text(x = 347, y = 0.01, "Actual Obama") }, main = "Electoral College Results Probability") dev.off() ################################ # # Election 2008 # ################################ library(LearnBayes) data(election.2008) attach(election.2008) ## Dirichlet simulation prob.Obama = function(j) { p=rdirichlet(5000,500*c(M.pct[j],O.pct[j],100-M.pct[j]-O.pct[j])/100+1) mean(p[,2]>p[,1]) } ## sapply function to compute Obama win prob for all states Obama.win.probs=sapply(1:51,prob.Obama) ## sim.EV function sim.election = function() { winner = rbinom(51,1,Obama.win.probs) sum(EV*winner) } sim.EV = replicate(1000,sim.election()) ## histogram of simulated election #pdf("bayes-obama-2008.pdf") hist(sim.EV,min(sim.EV):max(sim.EV),col="blue",prob=T) abline(v=365,lwd=3) # Obama received 365 votes text(375,30,"Actual \n Obama \n total") #dev.off() ## Actual electoral vote did fall within 90% prediction ## interval library(keras) mnist <- dataset_mnist() x_train <- mnist$train$x y_train <- mnist$train$y x_test <- mnist$test$x y_test <- mnist$test$y y_train <- to_categorical(y_train, 10) y_test <- to_categorical(y_test, 10) dim(x_train) <- c(nrow(x_train), 784) dim(x_test) <- c(nrow(x_test), 784) # rescale x_train <- x_train / 255 x_test <- x_test / 255 model <- keras_model_sequential() model %>% layer_dense(units = 256, activation = 'relu', input_shape = c(784)) %>% layer_dropout(rate = 0.4) %>% layer_dense(units = 128, activation = 'relu') %>% layer_dropout(rate = 0.3) %>% layer_dense(units = 10, activation = 'softmax') summary(model) model %>% compile( loss = 'categorical_crossentropy', optimizer = optimizer_rmsprop(), metrics = c('accuracy') ) history <- model %>% fit( x_train, y_train, epochs = 30, batch_size = 128, validation_split = 0.2 ) plot(history) model %>% predict_classes(x_test) ######### simulation example : regression N = 10000 K = 50 d1 = 10 d2 = 10 X = matrix(rnorm(N*K),N,K) Z1 = pmax(X,0) %*% matrix(rpois(K*d1,lambda=1)/d1,K,d1) + rep(rnorm(d1),each=N) Z2 = pmax(Z1,0) %*% matrix(rpois(d1*d2,lambda=1)/d2,d1,d2) + rep(rnorm(d2),each=N) Y = pmax(Z2,0) %*% matrix(rpois(d2*1,lambda=1),d2,1) + 1 train.idx = sample(1:N,floor(N*0.9),replace = FALSE) test.idx = setdiff(1:N,train.idx) x_train = X[train.idx,] y_train = Y[train.idx] x_test = X[test.idx,] y_test = Y[test.idx] model <- keras_model_sequential() model %>% layer_dense(units = 2*d1, activation = 'relu', input_shape = c(K)) %>% layer_dropout(rate = 0.5) %>% layer_dense(units = d2, activation = 'relu') %>% layer_dense(units = 1) model %>% compile( optimizer = optimizer_rmsprop(), loss = 'mse', metrics = c('mae') ) history <- model %>% fit( x_train, y_train, epochs = 100,batch_size = N*0.1 ) model %>% evaluate(x_test, y_test)