Here's code for ridge regression:
import numpy as np from PyML.classifiers.baseClassifiers import Classifier from PyML.evaluators import resultsObjects """ An implementation of ridge regression. This is a simpler version than the one in PyML (see classifiers/ridgeRegression). It works with the PyVectorDataSet container """ class RidgeRegression (Classifier) : """ An implementation of ridge regression :Keywords: - `ridge` -- the ridge parameter [default: 10.0] - `kernel` -- a kernel object [default: Linear] - `regression` -- whether to use the object for regression [default: False] in its default (False), it is used as a classifier - `fit_bias` -- whether to incorporate a bias term [default: True] """ attributes = {'ridge': 10, 'regression' : False, 'fit_bias' : True} def __init__(self, arg=None, **args) : Classifier.__init__(self, arg, **args) if self.regression : self.resultsObject = resultsObjects.RegressionResults self.classify = self.decisionFunc def train(self, data, **args) : Classifier.train(self, data, **args) if not self.regression and data.labels.numClasses != 2 : raise ValueError, "not a binary classification problem" if self.fit_bias : data.addFeature('bias', [1.0 for i in range(len(data))]) self.w = np.zeros(data.numFeatures) self.bias = 0.0 Y = np.array(data.labels.Y) if not (self.regression) : Y = Y * 2 - 1 self.w = np.linalg.solve(data.X.T.dot(data.X) + self.ridge * np.eye(data.numFeatures), data.X.T.dot(Y)) # there are alternative ways of computing the weight vector which are not # as computationally efficient: #self.w = np.dot(np.linalg.inv(data.X.T.dot(data.X)), X.T.dot(Y)) #self.w = np.dot(np.linalg.pinv(data.X), Y) if self.fit_bias : data.eliminateFeatures([data.numFeatures -1]) self.bias = self.w[-1] self.w = self.w[:-1] # this should be the last command in the train function self.log.trainingTime = self.getTrainingTime() def decisionFunc(self, data, i) : return np.dot(self.w, data.X[i]) + self.bias def classify(self, data, i) : score = self.decisionFunc(data, i) classification = 1 if score > 0 else 0 return (classification, score)
Now let's play with the code.
from PyML import * import ridge_regression rr = ridge_regression.RidgeRegression(regression=True)
We are going to use ridge regression for regression, so we have to set the regression flag to True.
Next we will read in some data taken from the UCI machine learning repository. The task is to predict where in the body a CT scan is obtained from. Here's a link to the data.
data = vectorDatasets.PyVectorDataSet('../data/slice_localization_data.csv', labelsColumn = -1, numericLabels=True)
Note that we had to tell PyML to interpret the labels as numeric.
Evaluating the classifier:
results = rr.cv(data)
A couple of other things to do with the data:
# how are the labels distributed? from matplotlib import pyplot as plt plt.hist(data.labels.Y, 50) # Looking at the weight vectors rr.train(data) plt.hist(rr.w, 25)
Using ridge regression as a classifier:
from PyML import * data = vectorDatasets.PyVectorDataSet('../data/gisette_sample.data', labelsColumn = 0) import ridge_regression rr = ridge_regression.RidgeRegression() rr.train(data) from matplotlib import pyplot as plt plt.hist(rr.w, 100) import perceptron p = perceptron.Perceptron() p.train(data) plt.hist(p.w, 100) # compare accuracy of ridge regression and the perceptron perceptron_results = p.stratifiedCV(data) ridge_results = rr.stratifiedCV(data)