{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Cross Validation\n", "\n", "As a first step we need a classifier." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn import cross_validation\n", "from sklearn import metrics" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Scikit-learn has datasets that are already ready for use:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from sklearn.datasets import load_breast_cancer\n", "data = load_breast_cancer()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A scikit-learn data object is container object with whose interesting attributes are: \n", " * ‘data’, the data to learn, \n", " * ‘target’, the classification labels, \n", " * ‘target_names’, the meaning of the labels,\n", " * ‘feature_names’, the meaning of the features, and \n", " * ‘DESCR’, the full description of the dataset.\n", "\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['malignant', 'benign'], \n", " dtype='