doParts <- c("part1a","part1b","part2a","part2b","part2c","part2d", "part3") ###################################################################### ### Assignment 3 Solution by Chuck Anderson ### CS545 Fall 2009 (www.cs.colostate.edu/~anderson/cs545) ###################################################################### ###################################################################### ### General functions makeStandardizeF <- function(X) { if (missing(X)) { cat("Usage: standardize <- makeStandardizeF(X) ## X is nSamples x nDimensions Xs <- standardize(X) X2s <- standardize(X2)\n") return(invisible()) } ## X is nSamples x nDimensions mu <- colMeans(X) sigma <- sd(X) ##sd should be named colSds function(newX) { nr <- nrow(newX) nc <- ncol(newX) (newX - matrix(mu,nr,nc,byrow=TRUE)) / matrix(sigma,nr,nc,byrow=TRUE) } } makeIndicatorVars <- function(Y) { if (!is.matrix(Y)) Y <- matrix(Y) classes <- unique(Y) N <- nrow(Y) K <- length(classes) logicalMatrix <- (matrix(Y,N,K) == matrix(classes,N,K,byrow=TRUE)) mode(logicalMatrix) <- "numeric" ## to convert to numbers 0, 1 logicalMatrix } mydnorm <- function(xs,mu,sigma) { xs <- as.matrix(xs) N <- nrow(xs) D <- ncol(xs) if (D == 1) detSigma <- sigma else detSigma <- det(sigma) normConstant <- 1/((2*pi)^(D/2) * sqrt(detSigma)) invSigma <- solve(sigma) xsZeroMean <- xs - matrix(rep(mu,N),N,D,byrow=TRUE) normConstant * exp(-1/2 * rowSums( xsZeroMean %*% invSigma * xsZeroMean)) } percentCorrect <- function(trueclasses, guesses) { sum(trueclasses==guesses) / length(trueclasses) * 100 } confusionMatrix <- function(trueclasses, guesses) { uniqs <- unique(trueclasses) cf = NULL for (c in uniqs) { mask <- trueclasses == c onerow = NULL; for (cguess in uniqs) onerow <- c(onerow, sum(guesses[mask] == cguess)) cf <- rbind(cf, onerow) } rownames(cf) <- paste("True Class",uniqs) colnames(cf) <- paste("Predicted Class",uniqs) cf / matrix(rep(rowSums(cf),ncol(cf)),ncol=ncol(cf)) } ###################################################################### ### Functions for either QDA or LDA makeGaussianDA <- function(X,T,makeDiscF=makeQDADiscF,avgCov=FALSE) { T <- matrix(T) classes <- sort(unique(T)) ## Convert T to 1,... so T can be 10,22, etc., any integers for class labels Tseq <- apply(T,1, function(c) {which(c == classes)}) # T converted to 1,..,K ### Standardize inputs standardizeF <- makeStandardizeF(X) X <- standardizeF(X) ### Create list of discriminant functions, discsFs discFs <- NULL K <- length(classes) N <- nrow(X) priors <- NULL mus <- NULL sigmas <- NULL for (k in 1:K) { rowsThisClass <- Tseq == k priors <- c(priors, list(sum(rowsThisClass) / N)) mus <- c(mus, list(colMeans(X[rowsThisClass,,drop=FALSE]))) sigmas <- c(sigmas, list(cov(X[rowsThisClass,,drop=FALSE]))) } if (avgCov) { sumcov <- sigmas[[1]] for (k in 2:K) sumcov <- sumcov + sigmas[[k]] avgcov <- sumcov / K } for (k in 1:K) { if (avgCov) sigma <- avgcov else sigma <- sigmas[[k]] discFs <- c(discFs, makeDiscF(mus[[k]],sigma,priors[[k]])) ## either QDA or LDA } list(classes = classes, discFs = discFs, standardizeF = standardizeF) } useGaussianDA <- function(model,X,probs=FALSE) { ## Standardize inputs X <- model$standardizeF(X) ## Collect discriminant function values for each class discs <- NULL for (k in 1:length(model$discFs)) discs <- cbind(discs, model$discFs[[k]](X,probs)) ## If probs wanted, exponentiate discriminant function values if (probs) { px <- rowSums(discs) K <- length(model$discFs) discs / matrix(rep(px,K),ncol=K) } else model$classes[apply(discs,1,which.max)] } ### For LDA makeLDADiscF <- function(mu,sigma,prior) { sigmaInv <- solve(sigma) sigmaInvmu <- sigmaInv %*% matrix(mu) constantPart <- -0.5 * matrix(mu,nrow=1) %*% sigmaInvmu + log(prior) function(X,probs=FALSE) { ## prob=TRUE: return as probability, not disc function value df <- X %*% sigmaInvmu + matrix(rep(constantPart,nrow(X))) if (probs) exp(-0.5 * rowSums(X %*% sigmaInv * X) + df) else df } } makeLDA <- function(X,T) { if (missing(X) || missing(T)) { cat("Usage: lda <- makeLDA(X,T) # X is nSamples x nComponents, T is nSamples x 1 predictions <- useLDA(lda,X) ### lda is list with classes, list of LDA discriminant functions, standardize\n") return(invisible(NULL)) } makeGaussianDA(X,T,makeLDADiscF,avgCov=TRUE) } useLDA <- function(lda,X,probs=FALSE) { if (missing(lda) || missing(X)) { cat("Usage: predictedClasses <- useLDA(qda,X) # lda is from makeLDA(), X is nSamples x nComponents predictionsProbs <- useLDA(qda,X,prob=TRUE)\n") return(invisible(NULL)) } useGaussianDA(lda,X,probs) } ### For QDA makeQDADiscF <- function(mu,sigma,prior) { ##cat("makeQDADiscF mu is ", mu, "\n") mu <- mu ## seems to be necessary to make static scoping work sigmaInv <- solve(sigma) function(X,probs=FALSE) { ## prob=TRUE: return as probability, not disc function value ##cat("in QDA discf mu is ", mu, "\n") diff <- X - matrix(mu,nrow(X),ncol(X),byrow=TRUE) df <- -0.5 * log(det(sigma)) - 0.5 * rowSums(diff %*% sigmaInv * diff) + log(prior) if (probs) exp(df) else df } } makeQDA <- function(X,T) { if (missing(X) || missing(T)) { cat("Usage: qda <- makeQDA(X,T) # X is nSamples x nComponents, T is nSamples x 1 predictions <- useQDA(qda,X) ### qda is list with classes, list of QDA discriminant functions, standardize\n") return(invisible(NULL)) } makeGaussianDA(X,T,makeQDADiscF) } useQDA <- function(qda,X,probs=FALSE) { if (missing(qda) || missing(X)) { cat("Usage: predictedClasses <- useQDA(qda,X) # qda is from makeQDA(), X is nSamples x nComponents predictionsProbs <- useQDA(qda,X,prob=TRUE)\n") return(invisible(NULL)) } useGaussianDA(qda,X,probs) } ###################################################################### ###################################################################### graphics.off() ## to delete all graphics windows ###################################################################### ### part 1a LDA X <- matrix(c(rnorm(10,1,0.1),rnorm(10,2,0.1),rnorm(10,3,0.1))) T <- matrix(c(rep(1,10),rep(2,10),rep(3,10))) Xtest <- matrix(seq(0,4,len=100)) if ("part1a" %in% doParts) { x11() par.orig <- par(mfrow=c(2,3),mar=c(5,5,1,1)) lda <- makeLDA(X,T) predc <- useLDA(lda,Xtest) ### 1. Training data plot(X,T,type="p",col=T, xlab="x", ylab="Class") ### 2. p(x|C=k) generative distributions ys <- NULL for (k in 1:3) { rowsInClass <- T==k ys <- cbind(ys, mydnorm(Xtest, mean(X[rowsInClass]), var(X[rowsInClass]))) } matplot(Xtest, ys, lwd=2,lty=1, type="l", xlab="x", ylab="P( x | C=k )") title("Linear Discriminant Analysis") ### 3. p(x) plot(Xtest,rowSums(ys) * 1/3, type="l", lwd=2, xlab="x", ylab="P( x )") ### 4. p(C=k|x) posteriors <- useLDA(lda,Xtest,probs=TRUE) matplot(Xtest,posteriors,type="l", lwd=2,lty=1, xlab="x", ylab="p( C=k | x )") ### 5. discriminant functions xs <- lda$standardize(Xtest) discs <- NULL for (k in 1:3) discs <- cbind(discs, lda$discFs[[k]](xs)) matplot(Xtest,discs,type="l", lty=1,lwd=2, xlab="x", ylab=expression(delta[k])) ### 6. classes plot(Xtest,predc,type="p",col=predc,xlab="x",ylab="Predicted Class") par(par.orig) dev.copy2pdf(file="part1a.pdf") } ###################################################################### ### part 1b QDA if ("part1b" %in% doParts) { x11() par.orig <- par(mfrow=c(2,3),mar=c(5,5,1,1)) qda <- makeQDA(X,T) predc <- useQDA(qda,Xtest) ### 1. Training data plot(X,T,type="p",col=T, xlab="x", ylab="Class") ### 2. p(x|C=k) generative distributions ys <- NULL for (k in 1:3) { rowsInClass <- T==k ys <- cbind(ys, mydnorm(Xtest, mean(X[rowsInClass]), var(X[rowsInClass]))) } matplot(Xtest, ys, lwd=2,lty=1, type="l", xlab="x", ylab="P( x | C=k )") title("Quadratic Discriminant Analysis") ### 3. p(x) plot(Xtest,rowSums(ys) * 1/3, type="l", xlab="x", ylab="P( x )") ### 4. p(C=k|x) posteriors <- useQDA(qda,Xtest,probs=TRUE) matplot(Xtest,posteriors,type="l", lty=1,lwd=2, xlab="x", ylab="p( C=k | x )") ### 5. discriminant functions xs <- qda$standardize(Xtest) discs <- NULL for (k in 1:3) discs <- cbind(discs, qda$discFs[[k]](xs)) matplot(Xtest,discs,type="l", lty=1,lwd=2, xlab="x", ylab=expression(delta[k])) ### 6. classes plot(Xtest,predc,type="p",col=predc,xlab="x",ylab="Predicted Class") par(par.orig) dev.copy2pdf(file="part1b.pdf") } ###################################################################### ### Part 2a LDA easy 2d solution ### train data means <- list(matrix(c(2,2, 3,2), 2,2,byrow=TRUE), matrix(c(3,6, 2,5), 2,2,byrow=TRUE), matrix(c(8,2, 7,2), 2,2,byrow=TRUE)) std <- 0.5 X <- NULL for (class in 1:3) { mus <- means[[class]] X <- rbind(X, cbind(rnorm(10,mus[1,1],std), rnorm(10,mus[1,2],std))) X <- rbind(X, cbind(rnorm(10,mus[2,1],std), rnorm(10,mus[2,2],std))) } T <- matrix(c(rep(1,20),rep(2,20),rep(3,20))) ### Test data xs <- seq(0,10,len=40) ys <- xs Xtest <- as.matrix(expand.grid(xs,ys)) ###################################################################### ### Part 2a LDA easy 2d solution if ("part2a" %in% doParts) { ### Make model lda <- makeLDA(X,T) predc <- useLDA(lda,Xtest) ### Plots x11() par.orig <- par(mfrow=c(2,3),mar=c(5,5,1,1)) plot(X[,1],X[,2],col=T+1,pch=paste(T),xlab=expression(x[1]),ylab=expression(x[2])) ### 2. p(x|C=k) generative distributions g <- NULL for (k in 1:3) { rowsInClass <- T==k g <- cbind(g, mydnorm(Xtest, colMeans(X[rowsInClass,]), cov(X[rowsInClass,]))) } N <- length(xs) surface <- matrix(apply(g,1,max),N,N) colorsmax <- 1+matrix(apply(g,1,which.max),N,N)[1:(N-1),1:(N-1)] persp(xs,ys,surface,xlab=expression(x[1]),ylab=expression(x[2]),zlab="P( x | C=k )", col=colorsmax,theta=-30,phi=40,border=NA,shade=0.3) title("Linear Discriminant Analysis") ### 3. p(x) surface <- matrix(rowSums(g) * 1/3, N,N) persp(xs,ys,surface,xlab=expression(x[1]),ylab=expression(x[2]),zlab="P( x )", col="gray",theta=-30,phi=40,border=NA,shade=0.3) ### 4. p(C=k|x) colors <- 1+matrix(predc,N,N)[1:(N-1),1:(N-1)] posteriors <- useLDA(lda,Xtest,probs=TRUE) surface <- matrix(apply(posteriors,1,max),N,N) persp(xs,ys,surface,xlab=expression(x[1]),ylab=expression(x[2]),zlab="P( C=k | x )", col=colors,theta=-30,phi=40,border=NA,shade=0.3) ### 5. discriminant functions xtests <- lda$standardize(Xtest) discs <- NULL for (k in 1:3) discs <- cbind(discs, lda$discFs[[k]](xtests)) surface <- matrix(apply(discs,1,max),N,N) persp(xs,ys,surface,xlab=expression(x[1]),ylab=expression(x[2]),zlab=expression(delta[k]), col=colors,theta=-30,phi=40,border=NA,shade=0.3) ### 6. classes persp(xs,ys, matrix(predc,N,N),xlab=expression(x[1]),ylab=expression(x[2]),zlab="Predicted Class", col=colors,theta=-30,phi=40,border=NA,shade=0.3) par(par.orig) dev.copy2pdf(file="part2a.pdf") } ###################################################################### ### Part 2b QDA easy 2d solution if ("part2b" %in% doParts) { ### Make model qda <- makeQDA(X,T) predc <- useQDA(qda,Xtest) ### Plots x11() par.orig <- par(mfrow=c(2,3),mar=c(5,5,1,1)) plot(X[,1],X[,2],col=T+1,pch=paste(T),xlab=expression(x[1]),ylab=expression(x[2])) ### 2. p(x|C=k) generative distributions g <- NULL for (k in 1:3) { rowsInClass <- T==k g <- cbind(g, mydnorm(Xtest, colMeans(X[rowsInClass,]), cov(X[rowsInClass,]))) } N <- length(xs) surface <- matrix(apply(g,1,max),N,N) colorsmax <- 1+matrix(apply(g,1,which.max),N,N)[1:(N-1),1:(N-1)] persp(xs,ys,surface,xlab=expression(x[1]),ylab=expression(x[2]),zlab="P( x | C=k )", col=colorsmax,theta=-30,phi=40,border=NA,shade=0.3) title("Quadratic Discriminant Analysis") ### 3. p(x) surface <- matrix(rowSums(g) * 1/3, N,N) persp(xs,ys,surface,xlab=expression(x[1]),ylab=expression(x[2]),zlab="P( x )", col="gray",theta=-30,phi=40,border=NA,shade=0.3) ### 4. p(C=k|x) colors <- 1+matrix(predc,N,N)[1:(N-1),1:(N-1)] posteriors <- useQDA(qda,Xtest,probs=TRUE) surface <- matrix(apply(posteriors,1,max),N,N) persp(xs,ys,surface,xlab=expression(x[1]),ylab=expression(x[2]),zlab="P( C=k | x )", col=colors,theta=-30,phi=40,border=NA,shade=0.3) ### 5. discriminant functions xtests <- lda$standardize(Xtest) discs <- NULL for (k in 1:3) discs <- cbind(discs, qda$discFs[[k]](xtests)) surface <- matrix(apply(discs,1,max),N,N) persp(xs,ys,surface,xlab=expression(x[1]),ylab=expression(x[2]),zlab=expression(delta[k]), col=colors,theta=-30,phi=40,border=NA,shade=0.3) ### 6. classes persp(xs,ys, matrix(predc,N,N),xlab=expression(x[1]),ylab=expression(x[2]),zlab="Predicted Class", col=colors,theta=-30,phi=40,border=NA,shade=0.3) par(par.orig) dev.copy2pdf(file="part2b.pdf") } ###################################################################### ### Part 2 hard 2d solution ### train data means <- list(matrix(c(2,2, 5,5), 2,2,byrow=TRUE), matrix(c(5,3, 7,7), 2,2,byrow=TRUE), matrix(c(4,4, 3,7), 2,2,byrow=TRUE)) std <- 0.5 X <- NULL for (class in 1:3) { mus <- means[[class]] X <- rbind(X, cbind(rnorm(10,mus[1,1],std), rnorm(10,mus[1,2],std))) X <- rbind(X, cbind(rnorm(10,mus[2,1],std), rnorm(10,mus[2,2],std))) } T <- matrix(c(rep(1,20),rep(2,20),rep(3,20))) ### Test data xs <- seq(0,10,len=40) ys <- xs Xtest <- as.matrix(expand.grid(xs,ys)) ###################################################################### ### Part 2c LDA hard 2d solution if ("part2c" %in% doParts) { ### Make model lda <- makeLDA(X,T) predc <- useLDA(lda,Xtest) ### Plots x11() par.orig <- par(mfrow=c(2,3),mar=c(5,5,1,1)) plot(X[,1],X[,2],col=T+1,pch=paste(T),xlab=expression(x[1]),ylab=expression(x[2])) ### 2. p(x|C=k) generative distributions g <- NULL for (k in 1:3) { rowsInClass <- T==k g <- cbind(g, mydnorm(Xtest, colMeans(X[rowsInClass,]), cov(X[rowsInClass,]))) } N <- length(xs) surface <- matrix(apply(g,1,max),N,N) colorsmax <- 1+matrix(apply(g,1,which.max),N,N)[1:(N-1),1:(N-1)] persp(xs,ys,surface,xlab=expression(x[1]),ylab=expression(x[2]),zlab="P( x | C=k )", col=colorsmax,theta=-30,phi=40,border=NA,shade=0.3) title("Linear Discriminant Analysis") ### 3. p(x) surface <- matrix(rowSums(g) * 1/3, N,N) persp(xs,ys,surface,xlab=expression(x[1]),ylab=expression(x[2]),zlab="P( x )", col="gray",theta=-30,phi=40,border=NA,shade=0.3) ### 4. p(C=k|x) colors <- 1+matrix(predc,N,N)[1:(N-1),1:(N-1)] posteriors <- useLDA(lda,Xtest,probs=TRUE) surface <- matrix(apply(posteriors,1,max),N,N) persp(xs,ys,surface,xlab=expression(x[1]),ylab=expression(x[2]),zlab="P( C=k | x )", col=colors,theta=-30,phi=40,border=NA,shade=0.3) ### 5. discriminant functions xtests <- lda$standardize(Xtest) discs <- NULL for (k in 1:3) discs <- cbind(discs, lda$discFs[[k]](xtests)) surface <- matrix(apply(discs,1,max),N,N) persp(xs,ys,surface,xlab=expression(x[1]),ylab=expression(x[2]),zlab=expression(delta[k]), col=colors,theta=-30,phi=40,border=NA,shade=0.3) ### 6. classes persp(xs,ys, matrix(predc,N,N),xlab=expression(x[1]),ylab=expression(x[2]),zlab="Predicted Class", col=colors,theta=-30,phi=40,border=NA,shade=0.3) par(par.orig) dev.copy2pdf(file="part2a.pdf") } ###################################################################### ### Part 2d QDA hard 2d solution if ("part2d" %in% doParts) { ### Make model qda <- makeQDA(X,T) predc <- useQDA(qda,Xtest) ### Plots x11() par.orig <- par(mfrow=c(2,3),mar=c(5,5,1,1)) plot(X[,1],X[,2],col=T+1,pch=paste(T),xlab=expression(x[1]),ylab=expression(x[2])) ### 2. p(x|C=k) generative distributions g <- NULL for (k in 1:3) { rowsInClass <- T==k g <- cbind(g, mydnorm(Xtest, colMeans(X[rowsInClass,]), cov(X[rowsInClass,]))) } N <- length(xs) surface <- matrix(apply(g,1,max),N,N) colorsmax <- 1+matrix(apply(g,1,which.max),N,N)[1:(N-1),1:(N-1)] persp(xs,ys,surface,xlab=expression(x[1]),ylab=expression(x[2]),zlab="P( x | C=k )", col=colorsmax,theta=-30,phi=40,border=NA,shade=0.3) title("Quadratic Discriminant Analysis") ### 3. p(x) surface <- matrix(rowSums(g) * 1/3, N,N) persp(xs,ys,surface,xlab=expression(x[1]),ylab=expression(x[2]),zlab="P( x )", col="gray",theta=-30,phi=40,border=NA,shade=0.3) ### 4. p(C=k|x) colors <- 1+matrix(predc,N,N)[1:(N-1),1:(N-1)] posteriors <- useQDA(qda,Xtest,probs=TRUE) surface <- matrix(apply(posteriors,1,max),N,N) persp(xs,ys,surface,xlab=expression(x[1]),ylab=expression(x[2]),zlab="P( C=k | x )", col=colors,theta=-30,phi=40,border=NA,shade=0.3) ### 5. discriminant functions xtests <- lda$standardize(Xtest) discs <- NULL for (k in 1:3) discs <- cbind(discs, qda$discFs[[k]](xtests)) surface <- matrix(apply(discs,1,max),N,N) persp(xs,ys,surface,xlab=expression(x[1]),ylab=expression(x[2]),zlab=expression(delta[k]), col=colors,theta=-30,phi=40,border=NA,shade=0.3) ### 6. classes persp(xs,ys, matrix(predc,N,N),xlab=expression(x[1]),ylab=expression(x[2]),zlab="Predicted Class", col=colors,theta=-30,phi=40,border=NA,shade=0.3) par(par.orig) dev.copy2pdf(file="part2b.pdf") } ###################################################################### ### real data set if ("part3" %in% doParts) { data <- read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data",header=TRUE,sep=",") data <- as.matrix(data[,-1]) ## remove name and make numeric data <- data[sample(nrow(data)),] ## randomly rearrange data status <- data[,"status"] data <- data[, -which(colnames(data)=="status") ] ## remove status column from data dataHealthy <- data[status==0,] dataParks <- data[status==1,] nHealthy <- nrow(dataHealthy) nParks <- nrow(dataParks) ### Force equal sampling proportion of two classes trainf <- 0.8 Xtrain <- rbind(dataHealthy[1:floor(trainf*nHealthy),], dataParks[1:floor(trainf*nParks),]) Ttrain <- matrix(c(rep(1,floor(trainf*nHealthy)), rep(2,floor(trainf*nParks)))) Xtest <- rbind(dataHealthy[-(1:floor(trainf*nHealthy)),], dataParks[-(1:floor(trainf*nParks)),]) Ttest <- matrix(c(rep(1,nHealthy-floor(trainf*nHealthy)), rep(2,nParks-floor(trainf*nParks)))) lda <- makeLDA(Xtrain,Ttrain) pcLDATrain <- percentCorrect(Ttrain,useLDA(lda,Xtrain)) pcLDATest <- percentCorrect(Ttest,useLDA(lda,Xtest)) qda <- makeQDA(Xtrain,Ttrain) pcQDATrain <- percentCorrect(Ttrain,useQDA(qda,Xtrain)) pcQDATest <- percentCorrect(Ttest,useQDA(qda,Xtest)) cat("LDA Percent Correct: Train ",pcLDATrain," Test ", pcLDATest,"\n") cat(" Confusion matrix for training data:\n") print(confusionMatrix(Ttrain,useLDA(lda,Xtrain))) cat(" Confusion matrix for testing data:\n") print(confusionMatrix(Ttest,useLDA(lda,Xtest))) cat("\n") cat("QDA Percent Correct: Train ",pcQDATrain," Test ", pcQDATest,"\n") cat(" Confusion matrix for training data:\n") print(confusionMatrix(Ttrain,useQDA(qda,Xtrain))) cat(" Confusion matrix for testing data:\n") print(confusionMatrix(Ttest,useQDA(qda,Xtest))) cat("\nSummary of Percent Correct\n") result <- matrix(c(pcLDATrain,pcLDATest, pcQDATrain,pcQDATest),2,2,byrow=TRUE) rownames(result) <- c("LDA","QDA") colnames(result) <- c("Train","Test") print(result) }