This shows you the differences between two versions of the page.
— |
code:pca [2016/11/03 14:21] (current) asa created |
||
---|---|---|---|
Line 1: | Line 1: | ||
+ | === Principal Components Analysis (PCA) === | ||
+ | <file python pca.py> | ||
+ | |||
+ | import numpy as np | ||
+ | import matplotlib.pyplot as plt | ||
+ | |||
+ | from sklearn import datasets | ||
+ | from sklearn.svm import SVC | ||
+ | from sklearn import cross_validation | ||
+ | from sklearn.decomposition import PCA | ||
+ | from sklearn import preprocessing | ||
+ | |||
+ | digits = datasets.load_digits() | ||
+ | X = digits.data | ||
+ | y = digits.target | ||
+ | |||
+ | # if you want to standardize the data, uncomment the following lines | ||
+ | #scaler = preprocessing.StandardScaler().fit(X) | ||
+ | #X = scaler.transform(X) | ||
+ | |||
+ | pca = PCA(n_components=10) | ||
+ | X_reduced = pca.fit_transform(X) | ||
+ | |||
+ | print (pca.explained_variance_ratio_) | ||
+ | |||
+ | # a scatter-plot in the space of the principal components: | ||
+ | |||
+ | plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y, cmap=plt.cm.Paired) | ||
+ | |||
+ | # let's see if this feature representation is useful: | ||
+ | X /= X.max() | ||
+ | |||
+ | from sklearn.grid_search import GridSearchCV | ||
+ | |||
+ | param_grid = [ | ||
+ | {'C': [1, 10, 100], 'kernel': ['linear']}, | ||
+ | {'C': [1, 10, 100], 'gamma': [0.01, 0.001, 0.0001], 'kernel': ['rbf']}, | ||
+ | ] | ||
+ | classifier = GridSearchCV(estimator=SVC(), param_grid=param_grid) | ||
+ | |||
+ | cv = cross_validation.StratifiedKFold(y, 5, shuffle=True, random_state=0) | ||
+ | # accuracy with all the features: | ||
+ | print (np.mean(cross_validation.cross_val_score(classifier, X, y, cv=cv))) | ||
+ | # accuracy with the PCA features: | ||
+ | print (np.mean(cross_validation.cross_val_score(classifier, X_reduced, y, cv=cv))) | ||
+ | |||
+ | </file> |