#### Diabetes Example: code to create the pvalues ## read the raw data dm2 <- read.csv("diabetes.csv") dim(dm2) names(dm2) dm2[dm2\$SNP=="rs6577581",] ## our table from the lecture ## Chisquare test tab<-dm2[dm2\$SNP=="rs6577581",3:5] chisq.test(tab) # Another way is tab<-as.table(as.matrix(tab)) summary(tab) # How was the Chisq test statistic calculated? # What are the marginal probabilities colSums(tab)/sum(tab) rowSums(tab)/sum(tab) # What would the table look like if H0 were true? tab_null<-rowSums(tab)%o%colSums(tab)/sum(tab) tab_null # The hypothetical table yields the same marginal probabilities colSums(tab_null)/sum(tab_null) rowSums(tab_null)/sum(tab_null) # Calculate Chisq value<-sum((tab-tab_null)^2/tab_null) value # P-value 1-pchisq(value,2) ## Functions; these are to make your life easier. ## I wrote this one to calculate the pvalue for each SNP table ## Don't worry too much about this code for now... get_pval <- function(snp){ tab <- dm2[dm2\$SNP==snp,3:5] ## get the table of values tab <- as.table(as.matrix(tab)) ## tell R it's a contingency table test <- summary(tab) ## summarize: this includes the chi2 test if(test\$approx.ok){ ## at least 5 expected count in each cell? pval <- test\$p.value ## if so, return the pvalue } else{ pval <- NA } ## otherwise return nothing. return(pval) } ## apply this function to every SNP ## sapply applies a function to every element in a list. pvals <- sapply(levels(dm2\$SNP), get_pval) ## saving `binary' data. This writes the object to disk, ## and you can read it back with 'load' (see dm2_fdr.R) save(pvals, file="dm2_pvals.rda")