makeStandardizeF <- function(X) { if (missing(X)) { cat("Usage: standardize <- makeStandardizeF(X) ## X is nSamples x nDimensions Xs <- standardize(X) X2s <- standardize(X2)\n") return(invisible()) } ## X is nSamples x nDimensions mu <- colMeans(X) sigma <- sd(X) ##sd should be named colSds function(newX) { nr <- nrow(newX) nc <- ncol(newX) (newX - matrix(mu,nr,nc,byrow=TRUE)) / matrix(sigma,nr,nc,byrow=TRUE) } } makeIndicatorVars <- function(Y) { if (!is.matrix(Y)) Y <- matrix(Y) classes <- unique(Y) N <- nrow(Y) K <- length(classes) logicalMatrix <- (matrix(Y,N,K) == matrix(classes,N,K,byrow=TRUE)) mode(logicalMatrix) <- "numeric" ## to convert to numbers 0, 1 logicalMatrix } ###################################################################### data <- read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data",header=TRUE,sep=",") data <- as.matrix(data[,-1]) ## remove name and make numeric ### data <- data[sample(nrow(data)),] ## randomly rearrange data status <- data[,"status"] data <- data[, -which(colnames(data)=="status") ] ## remove status column from data dataHealthy <- data[status==0,] dataParks <- data[status==1,] nHealthy <- nrow(dataHealthy) nParks <- nrow(dataParks) ### Force equal sampling proportion of two classes trainf <- 0.8 Xtrain <- rbind(dataHealthy[1:floor(trainf*nHealthy),], dataParks[1:floor(trainf*nParks),]) Ttrain <- matrix(c(rep(1,floor(trainf*nHealthy)), rep(2,floor(trainf*nParks)))) Xtest <- rbind(dataHealthy[-(1:floor(trainf*nHealthy)),], dataParks[-(1:floor(trainf*nParks)),]) Ttest <- matrix(c(rep(1,nHealthy-floor(trainf*nHealthy)), rep(2,nParks-floor(trainf*nParks)))) standardize <- makeStandardizeF(Xtrain) Xtrain1 <- cbind(1,standardize(Xtrain)) w <- solve(t(Xtrain1)%*%Xtrain1,t(Xtrain1)%*%Ttrain) TtrainPredicted <- Xtrain1 %*% w TtestPredicted <- cbind(1,standardize(Xtest)) %*% w pCorrectTrain <- sum(abs(TtrainPredicted - Ttrain) < 0.5) / length(Ttrain) * 100.0 pCorrectTest <- sum(abs(TtestPredicted - Ttest) < 0.5) / length(Ttest) * 100.0 cat("Training data",pCorrectTrain," percent correct\n") cat("Testing data",pCorrectTest," percent correct\n") p<-par(mfrow=c(1,2),bty="n") matplot(cbind(Ttrain,TtrainPredicted),type="b",pch=1,lty=1, xlab="Sample",ylab="True and Predicted Class",main="Train Data") matplot(cbind(Ttest,TtestPredicted),type="b",pch=1,lty=1, xlab="Sample",ylab="True and Predicted Class",main="Test Data")