{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Nearest neighbor classification" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "//anaconda/lib/python3.5/site-packages/sklearn/utils/fixes.py:313: FutureWarning: numpy not_equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.\n", " _nan_object_mask = _nan_object_array != _nan_object_array\n" ] } ], "source": [ "%matplotlib inline\n", "\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "from sklearn import datasets\n", "from sklearn.svm import LinearSVC,SVC\n", "from sklearn.model_selection import StratifiedKFold\n", "from sklearn.model_selection import cross_validate\n", "from sklearn.decomposition import PCA\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.model_selection import GridSearchCV\n", "from sklearn.neighbors import KNeighborsClassifier" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We'll use a [leukemia](https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html) gene expression data set in this notebook. I combined the train/test partitions of this dataset into a single file." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(72, 7129)" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.datasets import load_svmlight_file\n", "X, y = load_svmlight_file(\"data/leu_all.data\")\n", "X.shape\n", "#X = np.genfromtxt('MNIST/MNIST_train.csv', delimiter = ',')\n", "#y = np.genfromtxt('MNIST/MNIST_train_labels.csv', delimiter = ',')\n", "#X.shape" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.83333333333333326" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cv_generator = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)\n", "n_neighbors = 5\n", "classifier = KNeighborsClassifier(n_neighbors, p=2, weights='uniform')\n", "cv_results = cross_validate(classifier, X, y, cv=cv_generator, scoring='accuracy', return_train_score=False)\n", "np.mean(cv_results['test_score'])\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "Text(0.5,0,'number of neighbors')" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" }, { "data": { "text/plain": [ "Text(0,0.5,'accuracy')" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEKCAYAAADjDHn2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3X+YlXWd//Hni4Fx8gcDBuzXdUCm\nzR9gguhgZqtumUnkF5Uxgq1taU1dUttKavVav+Xa1rbJqnVplGaR5QWhINHuFPkrq5WMoUHUGTUU\ng1E2RoVRMBoY3t8/7nvwMMxwDjA3Z87M63Fd55r7x+e+7/c5DOc9n8/nvj8fRQRmZmZ7M6DYAZiZ\nWe/nZGFmZnk5WZiZWV5OFmZmlpeThZmZ5eVkYWZmeTlZmJlZXk4WZmaWl5OFmZnlNbDYAfSUYcOG\nxejRo4sdhplZSVm5cuXLETE8X7k+kyxGjx5NfX19scMwMyspkv5QSDk3Q5mZWV5OFmZmlpeThZmZ\n5dVn+izMrLRs376d5uZmtm3bVuxQ+oWKigqqqqoYNGjQfh3vZGFmRdHc3MwRRxzB6NGjkVTscPq0\niOCVV16hubmZ6urq/TpHv08Wq1fD4sWwbh2MGgVTp8K4ccWOyqzv27ZtmxPFQSKJt771rbS0tOz3\nOfp1n8Xq1TBnDmzaBFVVyc85c5LtZpY9J4qD50A/636dLBYvhqFDk9eAAW8uL15c7MjMzHqXfp0s\n1q2Dysrdt1VWJtvNrO87/PDD99j2y1/+klNOOYWBAwdy7733FnSe66+/njlz5gDwhS98gQceeKDb\nskuWLKGxsXH/Ai6ift1nMWpU0vQ0dOib21pbk+1m1rscrP7FUaNGMW/evF1f/vvqhhtu2Ov+JUuW\ncP755zN27Nj9On+x9OuaxdSpSbLYtAl27nxzeerUYkdmZrkOZv/i6NGjGTduHAMG7P3r8ctf/jLH\nH38873vf+3jmmWd2bZ85c+auGsk111zD2LFjGTduHLNnz+bRRx9l6dKlfO5zn+Pkk0/mueee4447\n7mDixImMHz+e2tpa3njjjV3n+dSnPsUZZ5zB2972tt1qOV/72tc46aSTGD9+PNdccw0Azz33HJMm\nTeLUU0/lzDPP5Omnn+7Rz6Vf1yzGjYPZs3f/a+WSS3w3lFlvk9u/CG/+XLy4OP9fV65cyYIFC2ho\naGDHjh2ccsopnHrqqbuVefXVV7nvvvt4+umnkcTmzZsZMmQIU6ZM4fzzz+fiiy8GYMiQIVx66aUA\nXHfdddx5551cddVVAGzYsIFf//rXPP3000yZMoWLL76Yn/70pyxZsoTHHnuMQw89lFdffRWAyy67\njG9961sce+yxPPbYY3zyk5/koYce6rH33K+TBSS/aE4OZr3bunVJjSJXMfsXf/WrX3HRRRdx6KGH\nAjBlypQ9ygwePJiKigo+8YlP8MEPfpDzzz+/y3M9+eSTXHfddWzevJktW7Zw3nnn7dp34YUXMmDA\nAMaOHcsf//hHAB544AE+/vGP77r2kUceyZYtW3j00Uf50Ic+tOvYP//5zz32fsHJwsxKQG/sX8x3\nK+rAgQP57W9/y4MPPsiCBQu49dZbu/xLf+bMmSxZsoTx48czb948fvGLX+zad8ghh+xajohdPztf\ne+fOnQwZMoRVq1YdwDvau37dZ2FmpaG39S+eddZZ3HffffzpT3/i9ddf5yc/+ckeZbZs2UJrayuT\nJ0/mlltu2fVFfsQRR/D666/vKvf6669z1FFHsX37du6+++68137/+9/Pd7/73V19G6+++iqDBw+m\nurqae+65B0gSyuOPP94Tb3UXJwsz6/U6+heHDoXm5uTn7NkH3oT8xhtvUFVVtet10003sWLFCqqq\nqrjnnnu4/PLLOfHEE/c47pRTTuHDH/4wJ598MrW1tZx55pl7lHn99dc5//zzGTduHGeffTY333wz\nANOnT+fGG29kwoQJPPfcc3zpS1/ine98J+eeey4nnHBC3pgnTZrElClTqKmp4eSTT95119bdd9/N\nnXfeyfjx4znxxBP58Y9/fGAfTifqqNpkQdIk4OtAGfCdiPhqp/2jgO8DQ9Iy10REnaRBwHeAU0ia\nyu6KiH/f27VqamrCkx+ZlY6mpibGjBlT7DD6la4+c0krI6Im37GZ1SwklQG3AR8AxgIzJHW+sfg6\nYGFETACmA99Mt38IOCQiTgJOBS6XNDqrWM3MbO+ybIY6DVgTEc9HRBuwALigU5kABqfLlcBLOdsP\nkzQQeAvQBryWYaxmZrYXWSaLo4H1OevN6bZc1wMfldQM1AFXpdvvBbYCG4B1wJyIeLXzBSRdJqle\nUv2BjKZoZmZ7l2Wy6Oq+ss4dJDOAeRFRBUwGfiBpAEmtpB34S6AauFrS2/Y4WcTtEVETETXDhw/v\n2ejNzGyXLJNFMzAyZ72KN5uZOlwCLASIiOVABTAM+FvgZxGxPSI2Av8D5O2A2V8bt25kzqNz2Lh1\nY1aXMDMraVkmixXAsZKqJZWTdGAv7VRmHXAOgKQxJMmiJd3+XiUOA04Henagk1RbextzV8xlxYsr\nmLtiLm3tbVlcxsyspGWWLCJiB3AlsAxoIrnr6SlJN0jqeDb+auBSSY8D84GZkdzLextwOPAkSdL5\nXkRkMiXRoqZFrN28ljHDx7B281oWNS3K4jJm1gN6uhWgqyHKb7rppl2D/51zzjn84Q9/yHseD1F+\ngCKijqTjOnfbF3KWG4F3d3HcFpLbZzPVsKGBumfrGD1kNAAjB4+k7tk6TnjrCUw4akLWl+8XNm7d\nyF2P38XHxn+MEYeNKHY4VsI6WgEaWxrZ2raVa8+8lvKy8h6/zoQJE6ivr+fQQw9l7ty5fP7zn+dH\nP/pRwcd7iPI+aGHjQiorKikbUAZA2YAyKisqWdi4sMiR9Q1u4rOedLBaAd7znvfsGqTv9NNPp7m5\nuctyHqK8H5k2dho3Lb+JoRVDKRtQRvvOdlq3tXLJhEuKHVqf0NV/7hnvmFHssKwEFasV4M477+QD\nH/jAHts9RHk/M+GoCUw+bjLL1ixj9JDRrH9tPZOPm+wmqB7gJj7rSXtrBcjq9+mHP/wh9fX1PPLI\nI3vs649DlPfrZiiA2jG1VA+ppqmlieoh1dSOqS12SH2Cm/isJ00bO43Wba2072wH2NUKMG3stEyu\n98ADD/DlL3+ZpUuX7jZMeK5Chyivra1lyZIlTJo0qctyM2fO5NZbb+WJJ57gi1/8Itu2bdu1b3+G\nKO94NTU1FfReC9Xvk0V5WTmzJs5i4tETmTVxViYdZv3Rwf7PbX1bRyvA+teSQSGybAVoaGjg8ssv\nZ+nSpYwY0fVNGf1xiPJ+3QzVYcRhI5h9xuxih9GnuInPelrtmFqefflZGlsaGTt8bI+0AnQMUd7h\ns5/9LHV1dWzZsmVXk86oUaNYunT3R8Ryhyg/5phjuh2i/IILLmDbtm1ExG5DlF966aV84xvf4N57\n7901RPkxxxzDSSedtFsi6cqkSZNYtWoVNTU1lJeXM3nyZL7yla9w9913M2vWLP7t3/6N7du3M336\ndMaPH3+gH9EumQ5RfjB5iPLep629jX//1b/v+s+d1a2OVpr2Z4hy34p9YHrlEOVmbuKzntbRCuBE\ncfC5Gcoy5SY+s77BNQszK5q+0gxeCg70s3ayMLOiqKio4JVXXnHCOAgigldeeYWKior9Poebocys\nKKqqqmhubsYTlx0cFRUVu935ta+cLMysKAYNGkR1dXWxw7ACuRnKzMzycrIwM7O8nCzMzCyvTJOF\npEmSnpG0RtI1XewfJelhSQ2SVkuanLNvnKTlkp6S9ISk/e/GL4JSm9e71OI1s4Mrs2QhqYxketQP\nAGOBGZI6Tw11Hcl0qxNI5uj+ZnrsQOCHwD9GxInA3wDbs4q1p5XapD+lFq+ZHXxZ1ixOA9ZExPMR\n0QYsAC7oVCaAwelyJfBSuvx+YHVEPA4QEa9ERHuGsfaoUpvXu9TiNbODL8tkcTSwPme9Od2W63rg\no5KaSebqvirdfhwQkpZJ+p2kz2cYZ4/qmPRn5OCRwJuT/jRsaChyZF0rtXjNrDiyTBZdzQzS+VHN\nGcC8iKgCJgM/kDSA5PmPvwY+kv68SNI5e1xAukxSvaT63vJgT6lN+lNq8ZpZcWSZLJqBkTnrVbzZ\nzNThEmAhQEQsByqAYemxj0TEyxHxBkmt45TOF4iI2yOiJiJqhg8fnsFb2HelNulPqcVrZsWRZbJY\nARwrqVpSOUkH9tJOZdYB5wBIGkOSLFqAZcA4SYemnd1nA40ZxtpjDuaMXj2h1OI1s+LILFlExA7g\nSpIv/iaSu56eknSDpI7Zza8GLpX0ODAfmBmJTcBNJAlnFfC7iPjvrGLtaaU2r3epxWtmB59nystI\nqc3oVWrxmlnPKHSmPCcLM7N+zNOqmplZj3GyMDOzvJwszMwsLycLMzPLy8nCzMzycrIwM7O8nCzM\nzCwvJ4sS40mKzKwYnCxKyMpVbUy7cS63LVnBtBvnsnKVJykys4NjYLED6ItWr4bFi2HdOhg1CqZO\nhXHjDvycn/72Il4ZupZRbxnDxra1fPrbi7ht1owDPndWsvgczKw4XLPoYatXw5w5sGkTVFUlP+fM\nSbYfiNsWN/Dy0DqGlY9EgmHlI3l5aB23Le6dkxRl9TmYWXE4WfSwxYth6NDkNWDAm8uLFx/YeZe3\nLuTwQZUMIJmkaABlHD6okuWtvXOSoqw+BzMrDieLHrZuHVRW7r6tsjLZfiDeVTmNLdtb2UkySdFO\n2tmyvZV3VfbOSYqy+hzMrDicLHrYqFHQ2rr7ttbWZPuBuGLqBIZtmszLbeuJgJfb1jNs02SumNo7\nJynK6nMws+JwsuhhU6cm7fObNsHOnW8uT516YOcdNw5uubyWEeXVrPtTEyPKq7nl8tpe22Gc1edg\nZsXh+SwykOVdQKU0SZHvhjLr/XrF5EeSJgFfB8qA70TEVzvtHwV8HxiSlrkmIuo67W8Ero+IOXu7\nVm9KFmZmpaLokx9JKgNuAz4AjAVmSBrbqdh1JHNzTwCmA9/stP9m4KdZxWhmZoXJss/iNGBNRDwf\nEW3AAuCCTmUCGJwuVwIvdeyQdCHwPPBUhjGamVkBskwWRwPrc9ab0225rgc+KqkZqAOuApB0GPDP\nwL9mGJ+ZmRUoy2ShLrZ17iCZAcyLiCpgMvADSQNIksTNEbFlrxeQLpNUL6m+paWlR4I2M7M9ZTk2\nVDMwMme9ipxmptQlwCSAiFguqQIYBrwTuFjS10g6v3dK2hYRt+YeHBG3A7dD0sGdybswM7NMk8UK\n4FhJ1cCLJB3Yf9upzDrgHGCepDFABdASEWd2FJB0PbClc6IwM7ODJ7NmqIjYAVwJLAOaSO56ekrS\nDZKmpMWuBi6V9DgwH5gZfeXBDzOzPsQP5ZmZ9WNFf87CzMz6DicLMzPLyzPlmRngsbxs71yzMDPP\nbGh5OVmYmWc2tLycLMzMMxtaXk4WZuaZDS0vJwsz88yGlpeThZkxbhzMnp30UzQ3Jz9nz/bdUPYm\n3zprZkCSGJwcrDuuWZiZWV4FJQtJiyR9MJ1rwszM+plCv/znkgwv/ntJX5V0QoYxmZlZL1NQsoiI\nByLiI8ApwAvA/ZIelfRxSYOyDNDMzIqv4GYlSW8FZgKfABqAr5Mkj/sziczMzHqNgu6GkrQYOAH4\nAfB/I2JDuutHkjyJhJlZH1forbO3RsRDXe0oZNIMMzMrbYU2Q42RNKRjRdJQSZ/Md5CkSZKekbRG\n0jVd7B8l6WFJDZJWS5qcbj9X0kpJT6Q/31vwOzIzsx5XaM3i0oi4rWMlIjZJuhT4ZncHSCoDbgPO\nBZqBFZKWRkRjTrHrSObmnitpLFAHjAZeJmnueknSO0jm8T56H96X7QPPY2Bm+RRasxggSR0raSIo\nz3PMacCaiHg+ItqABcAFncoEMDhdrgReAoiIhoh4Kd3+FFAh6ZACY7V94HkMLNfGrRuZ8+gcNm7d\nWOxQClJq8ZayQmsWy4CFkr5F8gX/j8DP8hxzNLA+Z70ZeGenMtcDP5d0FXAY8L4uzlMLNETEnwuM\n1fZB7jwG8ObPxYv7V+3CtStoa29j7oq5NLY0srVtK9eeeS3lZfn+JiyelavauHrJXP7wRiP/9fOt\n/OeF13Lqyb033lJXaM3in4GHgFnAFcCDwOfzHKMutkWn9RnAvIioAiYDP8h9SlzSicB/AJd3eQHp\nMkn1kupbWloKeiO2O89j4NpVh0VNi1i7eS1jho9h7ea1LGpaVOyQurV6NXz624vY2LaWUW8Zw8a2\ntXz624v63b/ZwVToQ3k7I2JuRFwcEbUR8e2IaM9zWDMwMme9irSZKcclwML0GsuBCmAYgKQq4D7g\nYxHxXDdx3R4RNRFRM3z48ELeinXieQw8SxxAw4YG6p6tY+Tg5L/syMEjqXu2joYNDUWOrGu3LW7g\n5aF1DCsfiQTDykfy8tA6blvcO+PtCwodG+pYSfdKapT0fMcrz2ErgGMlVUsqB6YDSzuVWQeck15j\nDEmyaEnvvPpv4NqI+J99eUO2bzyPgWtXAAsbF1JZUUnZgDIAygaUUVlRycLGhUWOrGvLWxdy+KBK\nBpDEO4AyDh9UyfLW3hlvX1BoM9T3SMaH2gG8B7iL5AG9bkXEDuBKkv6OJpK7np6SdIOkKWmxq4FL\nJT0OzAdmRkSkx70d+H+SVqWvEfv43qwAnsfAtSuAaWOn0bqtlfadSYNB+852Wre1Mm3stCJH1rV3\nVU5jy/ZWdpLEu5N2tmxv5V2VvTPevkDJd3OeQtLKiDhV0hMRcVK67VcRcWbmERaopqYm6uv9MLnt\nu44+i6FDkxpFa2tSu+pvSXP+k/NZtmYZo4eM5oXNL3De289jxjtmFDusLq1eDVfMnc8rQ5cxfNBo\nWra/wFs3ncdts2b0q3+znpB+v+d9uLrQmsW2tOP595KulHQR4L/0rU9w7SpRO6aW6iHVNLU0UT2k\nmtoxtcUOqVvjxsEtl9cyoryadX9qYkR5NbdcXtvv/s0OpkJrFhNJmpKGAF8ieTbixoj4TbbhFc41\nC7MDt3HrRu56/C4+Nv5jjDis9/89WGrx9kaF1izyJov0AbyvRsTneiq4LDhZmJntux5rhkpvkT01\n9wluMzPrXwp9grsB+LGke4CtHRsjoh/diW5m1n8VmiyOBF4Bckd/DcDJwsysHygoWUTEx7MOxMzM\neq9CZ8r7HnuO60RE/EOPR2RmZr1Ooc1Q/5WzXAFcxJ7jPJmZWR9VaDPUbsNPSpoPPJBJRGZm1usU\n+gR3Z8cC/WjkHDOz/q3QPovX2b3P4n9J5rgwM7N+oNBmqCOyDsTMzHqvQuezuEhSZc76EEkXZheW\nmZn1JoX2WXwxInaN+B8Rm4EvZhOSmZn1NoUmi67KFXrbrZmZlbhCk0W9pJsk/ZWkt0m6GViZZWBm\nZtZ7FJosrgLagB8BC4E/AVfkO0jSJEnPSFoj6Zou9o+S9LCkBkmrJU3O2Xdtetwzks4rME4zM8tA\noXdDbQX2+LLfm3QejNuAc4FmYIWkpRHRmFPsOpK5uedKGgvUAaPT5enAicBfAg9IOi4dLt3MzA6y\nQu+Gul/SkJz1oZKW5TnsNGBNRDwfEW3AAuCCTmWCZNY9gEreHELkAmBBRPw5ItYCa9LzmZlZERTa\nDDUsvQMKgIjYRP45uI8G1uesN6fbcl0PfFRSM0mt4qp9OBZJl0mql1Tf0tJSyPswM7P9UGiy2Clp\n1/AekkbTxSi0nXQ1s17nY2YA8yKiCpgM/EDSgAKPJSJuj4iaiKgZPnx4nnDMzGx/FXr7678Av5b0\nSLp+FnBZnmOagZE561XsOVLtJcAkgIhYLqkCGFbgsWZmdpAUVLOIiJ8BNcAzJHdEXU1yR9TerACO\nlVQtqZykw3pppzLrgHMAJI0hGf68JS03XdIhkqpJBi78bUHvyMzMelyhAwl+Avgnkr/wVwGnA8vZ\nfZrV3UTEDklXAsuAMuC7EfGUpBuA+ohYSpJ07pD0GZJmppkREcBTkhYCjcAO4ArfCWVmVjxKvpvz\nFJKeACYCv4mIkyWdAPxrRHw46wALVVNTE/X19cUOw8yspEhaGRE1+coV2sG9LSK2pSc+JCKeBo4/\nkADNzKx0FNrB3Zw+Z7EEuF/SJtzhbGbWbxT6BPdF6eL1kh4meYDuZ5lFZWZmvco+jxwbEY/kL2Vm\nZn3J/s7BbdbnbNy6kTmPzmHj1o3FDsWs13GyMAPa2tuYu2IuK15cwdwVc2lrbyt2SGa9ipOFGbCo\naRFrN69lzPAxrN28lkVNi4odklmv4mRh/V7Dhgbqnq1j5OBkhJmRg0dS92wdDRsaihyZWe/hZGH9\n3sLGhVRWVFI2oAyAsgFlVFZUsrBxYZEjM+s9nCys35s2dhqt21pp35mMKNO+s53Wba1MGzutyJGZ\n9R5OFtbvTThqApOPm8z615IpVNa/tp7Jx01mwlETihyZWe/hZGEG1I6ppXpINU0tTVQPqaZ2TG2x\nQzLrVZwszIDysnJmTZzFxKMnMmviLMrLyosdklmvss9PcJv1VSMOG8HsM2YXOwyzXsk1CzMzy8vJ\nwszM8nKyMDOzvDJNFpImSXpG0hpJ13Sx/2ZJq9LXs5I25+z7mqSnJDVJ+oYkZRmrmZl1L7MObkll\nwG3AuUAzsELS0oho7CgTEZ/JKX8VMCFdPgN4NzAu3f1r4GzgF1nFa2Zm3cuyZnEasCYino+INmAB\ncMFeys8A5qfLAVQA5cAhwCDgjxnGamZme5FlsjgaWJ+z3pxu24OkY4Bq4CGAiFgOPAxsSF/LIqKp\ni+Muk1Qvqb6lpaWHwzczsw5ZJouu+hiim7LTgXsjoh1A0tuBMUAVSYJ5r6Sz9jhZxO0RURMRNcOH\nD++hsM3MrLMsH8prBkbmrFcBL3VTdjpwRc76RcBvImILgKSfAqcDv8wgTjOzkrR6NSxeDOvWwahR\nMHUqjBuX/7j9kWXNYgVwrKRqSeUkCWFp50KSjgeGAstzNq8DzpY0UNIgks7tPZqhzMz6q9WrYc4c\n2LQJqqqSn3PmJNuzkFmyiIgdwJXAMpIv+oUR8ZSkGyRNySk6A1gQEblNVPcCzwFPAI8Dj0fET7KK\n1cys1CxeDEOHJq8BA95cXrw4m+tlOjZURNQBdZ22faHT+vVdHNcOXJ5lbGZmpWzduqRGkauyMtme\nBT/BbWZWgkaNgtbW3be1tibbs+BkYWZWgqZOTfopNm2CnTvfXJ46NZvrOVmYmZWgceNg9uykn6K5\nOfk5e3Z2d0N5PgszsxI1blx2yaEz1yzMzCwvJwszM8vLycLMzPJysjAzs7ycLMzMLC8nCzMzy8vJ\nwszM8nKyMDOzvJwszMwsLycLMzPLy8nCzMzycrIwM7O8Mk0WkiZJekbSGknXdLH/Zkmr0tezkjbn\n7Bsl6eeSmiQ1ShqdZaxmZta9zEadlVQG3AacCzQDKyQtjYjGjjIR8Zmc8lcBE3JOcRfw5Yi4X9Lh\nwM6sYjUzs73LsmZxGrAmIp6PiDZgAXDBXsrPAOYDSBoLDIyI+wEiYktEvJFhrGZmthdZJoujgfU5\n683ptj1IOgaoBh5KNx0HbJa0WFKDpBvTmoqZmRVBlslCXWyLbspOB+6NiPZ0fSBwJjAbmAi8DZi5\nxwWkyyTVS6pvaWk58IjNzKxLWSaLZmBkznoV8FI3ZaeTNkHlHNuQNmHtAJYAp3Q+KCJuj4iaiKgZ\nPnx4D4VtZmadZZksVgDHSqqWVE6SEJZ2LiTpeGAosLzTsUMldWSA9wKNnY81M7ODI7NkkdYIrgSW\nAU3Awoh4StINkqbkFJ0BLIiIyDm2naQJ6kFJT5A0ad2RVaxmZrZ3yvmOLmk1NTVRX19f7DDMzEqK\npJURUZOvnJ/gNjOzvJwszMwsLycLMzPLy8nCzMzycrIwM7O8nCzMzCwvJwszM8vLycLMzPJysjAz\ns7ycLMzMLC8nCzMzy8vJwszM8nKyMDOzvJwszMwsLycLMzPLy8nCzMzycrIwM7O8Mk0WkiZJekbS\nGknXdLH/Zkmr0tezkjZ32j9Y0ouSbs0yTjMz27uBWZ1YUhlwG3Au0AyskLQ0Iho7ykTEZ3LKXwVM\n6HSaLwGPZBWjmZkVJsuaxWnAmoh4PiLagAXABXspPwOY37Ei6VTgL4CfZxijmZkVIMtkcTSwPme9\nOd22B0nHANXAQ+n6AOA/gc9lGJ+ZmRUoy2ShLrZFN2WnA/dGRHu6/kmgLiLWd1M+uYB0maR6SfUt\nLS0HEKqZme1NZn0WJDWJkTnrVcBL3ZSdDlyRs/4u4ExJnwQOB8olbYmI3TrJI+J24HaAmpqa7hKR\nmZkdoCxrFiuAYyVVSyonSQhLOxeSdDwwFFjesS0iPhIRoyJiNDAbuKtzojAzM9i4dSNzHp3Dxq0b\nM71OZskiInYAVwLLgCZgYUQ8JekGSVNyis4AFkSEawZmZvugrb2NuSvmsuLFFcxdMZe29rbMrqW+\n8h1dU1MT9fX1xQ7DzOygmf/kfJatWcboIaN5YfMLnPf285jxjhn7dA5JKyOiJl85P8FtZlaCGjY0\nUPdsHSMHJ13DIwePpO7ZOho2NGRyPScLM7MStLBxIZUVlZQNKAOgbEAZlRWVLGxcmMn1nCzMzErQ\ntLHTaN3WSvvO5ImD9p3ttG5rZdrYaZlcz8nCzKwETThqApOPm8z615LH0da/tp7Jx01mwlGdR03q\nGU4WZmYlqnZMLdVDqmlqaaJ6SDW1Y2ozu5aThZlZiSovK2fWxFlMPHoisybOorysPLNrZfkEt5mZ\nZWzEYSOYfcbszK/jmoWZmeXlZGFmZnk5WZiZWV5OFmZmllefGRtKUgvwh2LH0ckw4OViB7EPSine\nUooVSiveUooVSive3hjrMRExPF+hPpMseiNJ9YUM0NVblFK8pRQrlFa8pRQrlFa8pRRrZ26GMjOz\nvJwszMwsLyeLbN1e7AD2USnFW0qxQmnFW0qxQmnFW0qx7sZ9FmZmlpdrFmZmlpeTRQYkjZT0sKQm\nSU9J+qdix5SPpDJJDZL+q9ix5CNpiKR7JT2dfsbvKnZM3ZH0mfR34ElJ8yVVFDumXJK+K2mjpCdz\nth0p6X5Jv09/Di1mjLm6ife3bf+AAAAHAklEQVTG9HdhtaT7JA0pZowduoo1Z99sSSFpWDFi2x9O\nFtnYAVwdEWOA04ErJI0tckz5/BPQVOwgCvR14GcRcQIwnl4at6SjgU8BNRHxDqAMmF7cqPYwD5jU\nads1wIMRcSzwYLreW8xjz3jvB94REeOAZ4FrD3ZQ3ZjHnrEiaSRwLrDuYAd0IJwsMhARGyLid+ny\n6yRfZkcXN6ruSaoCPgh8p9ix5CNpMHAWcCdARLRFxObiRrVXA4G3SBoIHAq8VOR4dhMRvwRe7bT5\nAuD76fL3gQsPalB70VW8EfHziNiRrv4GqDrogXWhm88W4Gbg80BJdRg7WWRM0mhgAvBYcSPZq1tI\nfnl3FjuQArwNaAG+lzabfUfSYcUOqisR8SIwh+QvyA1Aa0T8vLhRFeQvImIDJH/4ACOKHM+++Afg\np8UOojuSpgAvRsTjxY5lXzlZZEjS4cAi4NMR8Vqx4+mKpPOBjRGxstixFGggcAowNyImAFvpXc0k\nu6Rt/RcA1cBfAodJ+mhxo+q7JP0LSRPw3cWOpSuSDgX+BfhCsWPZH04WGZE0iCRR3B0Ri4sdz168\nG5gi6QVgAfBeST8sbkh71Qw0R0RHTe1ekuTRG70PWBsRLRGxHVgMnFHkmArxR0lHAaQ/NxY5nrwk\n/T1wPvCR6L3PA/wVyR8Oj6f/36qA30n6P0WNqkBOFhmQJJI29aaIuKnY8exNRFwbEVURMZqk8/Wh\niOi1f/1GxP8C6yUdn246B2gsYkh7sw44XdKh6e/EOfTSzvhOlgJ/ny7/PfDjIsaSl6RJwD8DUyLi\njWLH052IeCIiRkTE6PT/WzNwSvo73es5WWTj3cDfkfyVvip9TS52UH3IVcDdklYDJwNfKXI8XUpr\nP/cCvwOeIPn/1que4JU0H1gOHC+pWdIlwFeBcyX9nuSuna8WM8Zc3cR7K3AEcH/6f+1bRQ0y1U2s\nJctPcJuZWV6uWZiZWV5OFmZmlpeThZmZ5eVkYWZmeTlZmJlZXk4W1u9I+oWkzOdBlvSpdFTcA36i\nOB3WZK+DUUqaJ+niLrb/TSmMJmy928BiB2BWSiQNzBm0Lp9PAh+IiLUHet2I+MSBnmN/SSqLiPZi\nXd96B9csrFeSNDr9q/yOdD6In0t6S7pvV81A0rB06AQkzZS0RNJPJK2VdKWkz6YDDv5G0pE5l/io\npEfTeSZOS48/LJ2DYEV6zAU5571H0k+APQYCTK/xZPr6dLrtWySDHi6V9JlO5WdKWizpZ+mcEV/L\n2fd+Scsl/S695uFdvOdLJD2bbrtD0q05pz8rfV/Pd6plDE7nemiU9C1JA9JzzZD0RBr7f+TEsUXS\nDZIeA94l6avpsaslzdmXf0vrIyLCL7963QsYTTIo3Mnp+kLgo+nyL0jmiAAYBryQLs8E1pA8zTsc\naAX+Md13M8mAjh3H35EunwU8mS5/JecaQ0jmRjgsPW8zcGQXcZ5K8nT2YcDhwFPAhHTfC8CwLo6Z\nCTwPVAIVwB+Akel7+SVwWFrun4Ev5L5nkgEJXwCOBAYBvwJuTcvMA+4h+SNwLLAm3f43wDaS5FVG\nMv/Dxem51qWf1UDgIeDC9JgApqXLRwLP8OZDvEOK/fvh18F/uRnKerO1EbEqXV5JkkDyeTiSOURe\nl9QK/CTd/gQwLqfcfEjmHJA0WMnsau8nGVRxdlqmAhiVLt8fEV3NTfDXwH0RsRVA0mLgTKAhT5wP\nRkRrekwjcAxJghoL/E8ylBTlJMNF5DoNeKQjFkn3AMfl7F8SETuBRkl/kbP9txHxfHrM/DTu7cAv\nIqIl3X43SfJcArSTDIQJ8BpJsvmOpP8G3P/RDzlZWG/255zlduAt6fIO3mxC7TxNae4xO3PWd7L7\n73vncW4CEFAbEc/k7pD0TpKh0Lui7oLPo/N7G5ie6/6ImLGX4/JdL/e8uWW7e7/d2RZpP0VE7Eib\n6s4hGWzySuC9eeKwPsZ9FlaKXiBp/oGkOWV/fBhA0l+TTErUCiwDrkpHiEXShALO80vgwnRk2cOA\ni0iahvbHb4B3S3p7ev1DJR3XqcxvgbMlDVUy+15tgec+TVJ12lfxYeDXJBNynZ32+5QBM4BHOh+Y\n9ptURkQd8GmSwRutn3HNwkrRHGChpL8jaWffH5skPQoMJpldDeBLJLMGrk4TxgskcyR0KyJ+J2ke\nyZc4wHciIl8TVHfnapE0E5gv6ZB083UkfScdZV6U9BWSL/qXSIZnby3g9MtJRo89iSTB3RcROyVd\nCzxMUsuoi4iuhiM/AvixpIq03Ge6KGN9nEedNSsxkg6PiC1pzeI+4LsRcV+x47K+zc1QZqXnekmr\ngCeBtSQd0maZcs3CzMzycs3CzMzycrIwM7O8nCzMzCwvJwszM8vLycLMzPJysjAzs7z+P/CgSz3i\nxVaPAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "cv_generator = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)\n", "num_neighbors = [1,2,3,4,5,7,10,12,15]\n", "accuracy_l1 = []\n", "accuracy_l2 = []\n", "for k in num_neighbors :\n", " classifier = KNeighborsClassifier(k, p=1, weights='uniform')\n", " cv_results = cross_validate(classifier, X, y, cv=cv_generator, scoring='accuracy', return_train_score=False)\n", " accuracy_l1.append(np.mean(cv_results['test_score']))\n", " classifier = KNeighborsClassifier(k, p=2, weights='uniform')\n", " cv_results = cross_validate(classifier, X, y, cv=cv_generator, scoring='accuracy', return_train_score=False)\n", " accuracy_l2.append(np.mean(cv_results['test_score']))\n", "plt.plot(num_neighbors, accuracy_l1, 'ob', alpha=0.5, label='L1 distance')\n", "plt.plot(num_neighbors, accuracy_l2, 'dg', alpha=0.5, label='L2 distance')\n", "plt.legend()\n", "plt.xlabel('number of neighbors')\n", "plt.ylabel('accuracy')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.973333333333\n" ] } ], "source": [ "cv_generator = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)\n", "\n", "results = cross_validate(SVC(kernel='linear'), X, y, cv=cv_generator, scoring='accuracy', return_train_score=False)\n", "print(np.mean(results['test_score']))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "So, the SVM works a lot better than the nearest neighbor classifier. Let's see if PCA will help the nearest neighbor classifier." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "scipy.sparse.csr.csr_matrix" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(X)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "X = X.todense()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ 0.22568728 0.11655105 0.05409513 0.03533554 0.03003318 0.02801815\n", " 0.02467637 0.02206176 0.0191093 0.01810437 0.01618755 0.01578355\n", " 0.01457776 0.01412232 0.01314588 0.01234478 0.0113869 0.01122882\n", " 0.0099001 0.00957027]\n" ] }, { "data": { "text/plain": [ "(72, 20)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.decomposition import PCA\n", "pca = PCA(n_components=20)\n", "X_reduced = pca.fit_transform(X)\n", "\n", "print (pca.explained_variance_ratio_)\n", "X_reduced.shape" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.87523809523809537" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cv_generator = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)\n", "n_neighbors = 5\n", "classifier = KNeighborsClassifier(n_neighbors, p=2, weights='uniform')\n", "cv_results = cross_validate(classifier, X_reduced, y, cv=cv_generator, scoring='accuracy', return_train_score=False)\n", "np.mean(cv_results['test_score'])\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.5" } }, "nbformat": 4, "nbformat_minor": 1 }