Warning: Declaration of action_plugin_tablewidth::register(&$controller) should be compatible with DokuWiki_Action_Plugin::register(Doku_Event_Handler $controller) in /s/bach/b/class/cs545/public_html/fall15/lib/plugins/tablewidth/action.php on line 93
import numpy as np from sklearn import cross_validation from sklearn import svm from sklearn import metrics data=np.genfromtxt("../data/heart_scale.data", delimiter=",") X=data[:,1:] y=data[:,0] # let's train/test an svm on the heart dataset: X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4, random_state=0) classifier = svm.SVC(kernel='linear', C=1).fit(X_train, y_train) print classifier.score(X_test, y_test) # now let's use cross-validation instead: print cross_validation.cross_val_score(classifier, X, y, cv=5, scoring='accuracy') # you can obtain accuracy for other metrics, such as area under the roc curve: print cross_validation.cross_val_score(classifier, X, y, cv=5, scoring='roc_auc') # you can also obtain the predictions by cross-validation and then compute the accuracy: y_predict = cross_validation.cross_val_predict(classifier, X, y, cv=5) metrics.accuracy_score(y, y_predict) # here's an alternative way of doing cross-validation. # first divide the data into folds: cv = cross_validation.StratifiedKFold(y, 5) # now use these folds: print cross_validation.cross_val_score(classifier, X, y, cv=cv, scoring='roc_auc') # you can see how examples were divided into folds by looking at the test_folds attribute: print cv.test_folds # hmm... perhaps we should shuffle things a bit... cv = cross_validation.StratifiedKFold(y, 5, shuffle=True) print cv.test_folds # if you run division into folds multiple times you will get a different answer: cv = cross_validation.StratifiedKFold(y, 5, shuffle=True) print cv.test_folds # if you want to consistently get the same division into folds: cv = cross_validation.StratifiedKFold(y, 5, shuffle=True, random_state=0) # this sets the seed for the random number generator. # grid search # let's perform model selection using grid search from sklearn.grid_search import GridSearchCV Cs = np.logspace(-2, 3, 6) classifier = GridSearchCV(estimator=svm.LinearSVC(), param_grid=dict(C=Cs) ) classifier.fit(X, y) # print the best accuracy, classifier and parameters: print classifier.best_score_ print classifier.best_estimator_ print classifier.best_params_ # performing nested cross validation: print cross_validation.cross_val_score(classifier, X, y, cv=5) # if we want to do grid search over multiple parameters: param_grid = [ {'C': [1, 10, 100, 1000], 'kernel': ['linear']}, {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']}, ] classifier = GridSearchCV(estimator=svm.SVC(), param_grid=param_grid) print cross_validation.cross_val_score(classifier, X, y, cv=5)