This shows you the differences between two versions of the page.
Both sides previous revision Previous revision | |||
code:feature_selection [2016/08/09 10:25] 127.0.0.1 external edit |
code:feature_selection [2016/11/01 14:29] (current) asa |
||
---|---|---|---|
Line 5: | Line 5: | ||
<file python feature_selection.py> | <file python feature_selection.py> | ||
+ | |||
""" | """ | ||
================================================= | ================================================= | ||
Line 30: | Line 31: | ||
# for the features (any linear classifier will work): | # for the features (any linear classifier will work): | ||
classifier = LinearSVC() | classifier = LinearSVC() | ||
- | selector = RFE(classifier, step=0.1,n_features_to_select=25) | + | selector = RFE(classifier, step=0.1, n_features_to_select=25) |
# run feature selection: | # run feature selection: | ||
selector = selector.fit(X, y) | selector = selector.fit(X, y) | ||
Line 43: | Line 44: | ||
# the wrong way to perform cross-validation: | # the wrong way to perform cross-validation: | ||
cv = cross_validation.StratifiedKFold(y, 5, shuffle=True, random_state=0) | cv = cross_validation.StratifiedKFold(y, 5, shuffle=True, random_state=0) | ||
- | print np.mean(cross_validation.cross_val_score(classifier, Xt, y, cv=cv)) | + | print (np.mean(cross_validation.cross_val_score(classifier, Xt, y, cv=cv))) |
# now let's perform nested cross-validation: | # now let's perform nested cross-validation: | ||
Line 50: | Line 51: | ||
rfe_svm = make_pipeline(selector, classifier) | rfe_svm = make_pipeline(selector, classifier) | ||
- | print np.mean(cross_validation.cross_val_score(rfe_svm, X, y, cv=cv)) | + | print (np.mean(cross_validation.cross_val_score(rfe_svm, X, y, cv=cv))) |
# feature selection using a univariate filter method: | # feature selection using a univariate filter method: | ||
Line 58: | Line 59: | ||
filter_svm = make_pipeline(filter_selector, classifier) | filter_svm = make_pipeline(filter_selector, classifier) | ||
- | print np.mean(cross_validation.cross_val_score(filter_svm, X, y, cv=cv)) | + | print (np.mean(cross_validation.cross_val_score(filter_svm, X, y, cv=cv))) |
</file> | </file> |