CS545 fall 2016

Ridge Regression

Here's code for ridge regression:

import numpy as np
from PyML.classifiers.baseClassifiers import Classifier
from PyML.evaluators import resultsObjects
 
"""
An implementation of ridge regression.
This is a simpler version than the one in PyML (see classifiers/ridgeRegression).
It works with the PyVectorDataSet container
"""
 
class RidgeRegression (Classifier) :
 
    """
    An implementation of ridge regression
 
    :Keywords:
      - `ridge` -- the ridge parameter [default: 10.0]
      - `kernel` -- a kernel object [default: Linear]
      - `regression` -- whether to use the object for regression [default: False]
        in its default (False), it is used as a classifier
      - `fit_bias` -- whether to incorporate a bias term [default: True]
 
    """
 
    attributes = {'ridge': 10,
                  'regression' : False,
                  'fit_bias' : True}
 
    def __init__(self, arg=None, **args) :
 
        Classifier.__init__(self, arg, **args)
        if self.regression :
            self.resultsObject = resultsObjects.RegressionResults
            self.classify = self.decisionFunc
 
 
    def train(self, data, **args) :
 
        Classifier.train(self, data, **args)
 
        if not self.regression and data.labels.numClasses != 2 :
            raise ValueError, "not a binary classification problem"
 
        if self.fit_bias :
            data.addFeature('bias', [1.0 for i in range(len(data))])
 
        self.w = np.zeros(data.numFeatures)
        self.bias = 0.0
 
        Y = np.array(data.labels.Y)
        if not (self.regression) :
            Y = Y * 2 - 1
        self.w = np.linalg.solve(data.X.T.dot(data.X) + self.ridge * np.eye(data.numFeatures), data.X.T.dot(Y))
        # there are alternative ways of computing the weight vector which are not
        # as computationally efficient:
        #self.w = np.dot(np.linalg.inv(data.X.T.dot(data.X)), X.T.dot(Y))
        #self.w = np.dot(np.linalg.pinv(data.X), Y)
        if self.fit_bias :
            data.eliminateFeatures([data.numFeatures -1])
            self.bias = self.w[-1]
            self.w = self.w[:-1]
 
        # this should be the last command in the train function
        self.log.trainingTime = self.getTrainingTime()
 
 
    def decisionFunc(self, data, i) :
 
        return np.dot(self.w, data.X[i]) + self.bias
 
    def classify(self, data, i) :
 
        score = self.decisionFunc(data, i)
        classification = 1 if score > 0 else 0
        return (classification, score)

Now let's play with the code.

from PyML import *
import ridge_regression
rr = ridge_regression.RidgeRegression(regression=True)

We are going to use ridge regression for regression, so we have to set the regression flag to True.

Next we will read in some data taken from the UCI machine learning repository. The task is to predict where in the body a CT scan is obtained from. Here's a link to the data.

data = vectorDatasets.PyVectorDataSet('../data/slice_localization_data.csv', labelsColumn = -1, numericLabels=True)

Note that we had to tell PyML to interpret the labels as numeric.

Evaluating the classifier:

results = rr.cv(data)

A couple of other things to do with the data:

# how are the labels distributed?
from matplotlib import pyplot as plt
plt.hist(data.labels.Y, 50)
 
# Looking at the weight vectors
 
rr.train(data)
plt.hist(rr.w, 25)

Using ridge regression as a classifier:

from PyML import *
data = vectorDatasets.PyVectorDataSet('../data/gisette_sample.data', labelsColumn = 0)
import ridge_regression
rr = ridge_regression.RidgeRegression()
rr.train(data)
 
from matplotlib import pyplot as plt
plt.hist(rr.w, 100)
 
import perceptron
p = perceptron.Perceptron()
p.train(data)
 
plt.hist(p.w, 100)
 
# compare accuracy of ridge regression and the perceptron
 
perceptron_results = p.stratifiedCV(data)
ridge_results = rr.stratifiedCV(data)

CS545 fall 2016

User Tools

Site Tools

Sidebar

Ridge Regression

Page Tools