<br />
<b>Warning</b>:  Declaration of action_plugin_tablewidth::register(&$controller) should be compatible with DokuWiki_Action_Plugin::register(Doku_Event_Handler $controller) in <b>/s/bach/b/class/cs545/public_html/fall15/lib/plugins/tablewidth/action.php</b> on line <b>93</b><br />
import numpy as np
from sklearn import cross_validation
from sklearn import svm
from sklearn import metrics

data=np.genfromtxt("../data/heart_scale.data", delimiter=",")
X=data[:,1:]
y=data[:,0]

# let's train/test an svm on the heart dataset:

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4, random_state=0)
classifier = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
print classifier.score(X_test, y_test)

# now let's use cross-validation instead:
print cross_validation.cross_val_score(classifier, X, y, cv=5, scoring='accuracy')

# you can obtain accuracy for other metrics, such as area under the roc curve:
print cross_validation.cross_val_score(classifier, X, y, cv=5, scoring='roc_auc')

# you can also obtain the predictions by cross-validation and then compute the accuracy:
y_predict = cross_validation.cross_val_predict(classifier, X, y, cv=5)
metrics.accuracy_score(y, y_predict)

# here's an alternative way of doing cross-validation.
# first divide the data into folds:
cv = cross_validation.StratifiedKFold(y, 5)
# now use these folds:
print cross_validation.cross_val_score(classifier, X, y, cv=cv, scoring='roc_auc')

# you can see how examples were divided into folds by looking at the test_folds attribute:
print cv.test_folds

# hmm... perhaps we should shuffle things a bit...

cv = cross_validation.StratifiedKFold(y, 5, shuffle=True)
print cv.test_folds

# if you run division into folds multiple times you will get a different answer:
cv = cross_validation.StratifiedKFold(y, 5, shuffle=True)
print cv.test_folds

# if you want to consistently get the same division into folds:
cv = cross_validation.StratifiedKFold(y, 5, shuffle=True, random_state=0)
# this sets the seed for the random number generator.


# grid search

# let's perform model selection using grid search 

from sklearn.grid_search import GridSearchCV
Cs = np.logspace(-2, 3, 6)
classifier = GridSearchCV(estimator=svm.LinearSVC(), param_grid=dict(C=Cs) )
classifier.fit(X, y)

# print the best accuracy, classifier and parameters:
print classifier.best_score_
print classifier.best_estimator_
print classifier.best_params_

# performing nested cross validation:
print  cross_validation.cross_val_score(classifier, X, y, cv=5)

# if we want to do grid search over multiple parameters:
param_grid = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
 ]
classifier = GridSearchCV(estimator=svm.SVC(), param_grid=param_grid)
print cross_validation.cross_val_score(classifier, X, y, cv=5)