newfood = read.csv("http://faculty.chicagobooth.edu/nicholas.polson/teaching/41000/newfood.csv",header=T) attach(newfood) names(newfood) # correlation matrix cor(cbind(sales,price,adv,locat,income,svol)) # Sales on Price, Adv, Locat model1 = lm(sales~price+adv+locat) summary(model1) # Build kitchen-sink model model = lm(sales~price+adv+locat+income+svol) summary(model) # check diagnostics plot(model,pch=20,col="blue") # log-log model # can't transform dummy variables: adv, locat log_sales = log(sales) log_price = log(price) log_income = log(income) log_svol = log(svol) # Build model modelnew = lm(log_sales~log_price+adv+log_income+log_svol) summary(modelnew) # check diagnostics plot(modelnew,pch=20,col="blue") plot(cooks.distance(modelnew),pch=20,col="maroon") # statistical significance summary(modelnew) # Prediction newdata1 = data.frame(log_price=log(30),adv=1,log_income=log(8),log_svol=log(34)) predict.lm(modelnew,newdata1,se.fit=T,interval="confidence",level=0.95) # predict.lm at median(log_price), adv=1, median(log_income), median(log_svol) newdata2 = data.frame(log_price=median(log_price),adv=1, log_income=median(log_income),log_svol=median(log_svol)) predict.lm(modelnew,newdata2,se.fit=T,interval="prediction") # prediction line for log-log layout(matrix(c(1,2,3,4),2,2)) termplot(modelnew,ylabs="log_sales",terms="log_price",se=T) termplot(modelnew,ylabs="log_sales",terms="adv",se=T) termplot(modelnew,ylabs="log_sales",terms="log_income",se=T) termplot(modelnew,ylabs="log_sales",terms="log_svol",se=T)