###################################################################### ### k-NN ###################################################################### makeKNN <- function(Xtrain,Ttrain) { standardizeF <- makeStandardizeF(Xtrain) Xtrain <- standardizeF(Xtrain) list(Xtrain=Xtrain, Ttrain=Ttrain, classes=unique(Ttrain), standardizeF=standardizeF) } useKNN <- function(knn,Xtest, nNeighbors=1) { Xtest <- knn$standardizeF(Xtest) indices <- apply(Xtest, 1, function(xtest) { distancesSquared <- rowSums((knn$Xtrain - matrix(xtest,nrow(knn$Xtrain),ncol(knn$Xtrain),byrow=TRUE))^2) order(distancesSquared) }) indices <- t(indices) predictions <- NULL for (k in nNeighbors) predictions <- cbind(predictions, mostCommon(matrix(knn$Ttrain[c(indices[,1:k])],nrow(indices),k), knn$classes)) predictions } mostCommon <- function(data, values=unique(c(data))) { if (missing(data)) { cat("Usage: mostCommon(data, ) where most common is determined for each row\n") } if (is.null(dim(data))) { counts <- lapply(values, function(v) sum(v==data)) results <- values[which.max(counts)] } else { ## multiple rows in data results <- NULL for (rowi in 1:nrow(data)) { datarow <- data[rowi,] counts <- lapply(values, function(v) sum(v==datarow)) results <- c(results, values[which.max(counts)]) } } matrix(results) }