"discrim" <- function(da,size,eqP=T,eqV=T,pr=c(0)){ # da: data matrix. The data are arranged according to populations. # size: a vector of sample size of each populations so that # the length of k is the number of populations. # eqP: switch for equal probabilities # eqV: switch for equal covariance matrices. # Assume equal costs. # Assume normality. # nr = nrow(da) nc = ncol(da) g=length(size) # compute the sample mean and covariance matrix of each populations cm=matrix(0,nc,g) dim=g*nc # S: stores the covariance matrices (one on top of the other) S=matrix(0,dim,nc) Sinv=matrix(0,dim,nc) ist=0 for (i in 1:g){ x = da[(ist+1):(ist+size[i]),] if(nc > 1){ cm1 = apply(x,2,mean)} else{ cm1=c(mean(x)) } cm[,i]=cm1 smtx=var(x) jst =(i-1)*nc S[(jst+1):(jst+nc),]=smtx Si=solve(smtx) Sinv[(jst+1):(jst+nc),]=Si print(i) print("mean vector:") print(cm1,digits=3) print("Covariance matrix:") print(smtx,digits=4) # print("Inverse of cov-mtx:") # print(Si,digits=4) ist=ist+size[i] } if(eqP){ pr=rep(1,g)/g } if(eqV){ print("Assume equal covariance matrices and costs") Sp=matrix(0,nc,nc) for (i in 1:g){ jdx=(i-1)*nc smtx=S[(jdx+1):(jdx+nc),] Sp=Sp+(size[i]-1)*smtx } Sp = Sp/(nr-g) print("Sp") print(Sp,digits=4) Spinv=solve(Sp) print("Sp-inv") print(Spinv,digits=4) # compute ai-vector and bi value ai=matrix(0,g,nc) bi=rep(0,g) for (i in 1:g){ cmi = as.matrix(cm[,i],nc,1) ai[i,]= t(cmi)%*%Spinv bi[i]=(-0.5)*ai[i,]%*%cmi+log(pr[i]) } ist = 0 tmc=matrix(0,g,g) # icnt: counts of misclassification icnt = 0 mtab=NULL for(i in 1:g){ cls=rep(0,g) jst=ist+1 jend=ist+size[i] for (j in jst:jend){ dix=bi for (ii in 1:g){ dix[ii]=dix[ii]+sum(ai[ii,]*da[j,]) } # Find the maximum kx=1 dmax=dix[1] for (jj in 2:g){ if(dix[jj]>dmax){ kx=jj dmax=dix[jj] } } cls[kx]=cls[kx]+1 if(abs(kx-i)>0){ icnt=icnt+1 # compute the posterior probabilities xo=da[j,] tmp=as.matrix(xo)%*%Spinv tmp1=sum(xo*tmp) tmp1 = (tmp1 + det(Sp))/2 pro = exp(dix-tmp1) pro=pro/sum(pro) case=c(j,i,kx,pro) mtab=rbind(mtab,case) } } tmc[i,]=cls ist=ist+size[i] } } # Unequal covariance matrices else { print("Assume Un-equal covariance matrices, but equal costs") # The next loop computes the dix components that do not depend on the obs. bi=log(pr) for (i in 1:g){ jst=(i-1)*nc Si=S[(jst+1):(jst+nc),] bi[i]=bi[i]-0.5*log(det(Si)) } ist = 0 tmc=matrix(0,g,g) # icnt: counts of misclassification icnt = 0 # mtab: stores the misclassification cases mtab=NULL for(i in 1:g){ cls=rep(0,g) # identify the cases for population i. jst=ist+1 jend=ist+size[i] for (j in jst:jend){ # Compute the dix measure xo=da[j,] dix=bi for (ii in 1:g){ cmi = cm[,ii] kst=(ii-1)*nc Si=Sinv[(kst+1):(kst+nc),] dev=xo-cmi tmp=as.matrix(dev)%*%Si dix[ii]=dix[ii]-0.5*sum(dev*tmp) } # Find the maximum kx=1 dmax=dix[1] for (jj in 2:g){ if(dix[jj]>dmax){ kx=jj dmax=dix[jj] } } cls[kx]=cls[kx]+1 if(abs(kx-i)>0){ icnt=icnt+1 # compute the posterior probabilities pro = exp(dix) pro=pro/sum(pro) case=c(j,i,kx,pro) mtab=rbind(mtab,case) } } tmc[i,]=cls ist=ist+size[i] } } # Output if(nrow(mtab)>0){ print("Misclassification cases") print("Case From To Post-prob") print(mtab,digits=3) } print("Classification table:") print(tmc) print("Error rate:") aper=icnt/nr print(aper) }