User Tools

Site Tools


code:pca

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

code:pca [2016/11/03 14:21] (current)
asa created
Line 1: Line 1:
 +=== Principal Components Analysis (PCA) ===
  
 +<file python pca.py>
 +
 +import numpy as np
 +import matplotlib.pyplot as plt
 +
 +from sklearn import datasets
 +from sklearn.svm import SVC
 +from sklearn import cross_validation
 +from sklearn.decomposition import PCA
 +from sklearn import preprocessing
 +
 +digits = datasets.load_digits()
 +X = digits.data
 +y = digits.target
 +
 +# if you want to standardize the data, uncomment the following lines
 +#scaler = preprocessing.StandardScaler().fit(X)
 +#X = scaler.transform(X)
 +
 +pca = PCA(n_components=10)
 +X_reduced = pca.fit_transform(X)
 +
 +print (pca.explained_variance_ratio_)
 +
 +# a scatter-plot in the space of the principal components:
 +
 +plt.scatter(X_reduced[:,​ 0], X_reduced[:,​ 1], c=y, cmap=plt.cm.Paired)
 +
 +# let's see if this feature representation is useful:
 +X /= X.max()
 +
 +from sklearn.grid_search import GridSearchCV
 +
 +param_grid = [
 +  {'​C':​ [1, 10, 100], '​kernel':​ ['​linear'​]},​
 +  {'​C':​ [1, 10, 100], '​gamma':​ [0.01, 0.001, 0.0001], '​kernel':​ ['​rbf'​]},​
 + ]
 +classifier = GridSearchCV(estimator=SVC(),​ param_grid=param_grid)
 +
 +cv = cross_validation.StratifiedKFold(y,​ 5, shuffle=True,​ random_state=0)
 +# accuracy with all the features:
 +print (np.mean(cross_validation.cross_val_score(classifier,​ X, y, cv=cv)))
 +# accuracy with the PCA features:
 +print (np.mean(cross_validation.cross_val_score(classifier,​ X_reduced, y, cv=cv)))
 +
 +</​file>​
code/pca.txt ยท Last modified: 2016/11/03 14:21 by asa