Differences

This shows you the differences between two versions of the page.

--- code:ridge_regression [2013/09/19 12:32]
asa
+++ code:ridge_regression [2016/08/09 10:25]
@@ Line 1: / Line 1: @@
-===== Ridge Regression =====
-<file python ridge_regression.py>
-import numpy as np
-from PyML.classifiers.baseClassifiers import Classifier
-from PyML.evaluators import resultsObjects
-"""
-An implementation of ridge regression.
-This is a simpler version than the one in PyML (see classifiers/ridgeRegression).
-It works with the PyVectorDataSet container
-"""
-class RidgeRegression (Classifier) :
-    """
-    An implementation of ridge regression
-    :Keywords:
-      - `ridge` -- the ridge parameter [default: 10.0]
-      - `kernel` -- a kernel object [default: Linear]
-      - `regression` -- whether to use the object for regression [default: False]
-        in its default (False), it is used as a classifier
-      - `fit_bias` -- whether to incorporate a bias term [default: True]
-    """
-    attributes = {'ridge': 10,
-                  'regression' : False,
-                  'fit_bias' : True}
-    def __init__(self, arg=None, **args) :
-        Classifier.__init__(self, arg, **args)
-        if self.regression :
-            self.resultsObject = resultsObjects.RegressionResults
-            self.classify = self.decisionFunc
-    def train(self, data, **args) :
-        Classifier.train(self, data, **args)
-        if not self.regression and data.labels.numClasses != 2 :
-            raise ValueError, "not a binary classification problem"
-        if self.fit_bias :
-            data.addFeature('bias', [1.0 for i in range(len(data))])
-        self.w = np.zeros(data.numFeatures)
-        self.bias = 0.0
-        Y = np.array(data.labels.Y)
-        if not (self.regression) :
-            Y = Y * 2 - 1
-        self.w = np.linalg.solve(data.X.T.dot(data.X) + self.ridge * np.eye(data.numFeatures), data.X.T.dot(Y))
-        # there are alternative ways of computing the weight vector which are not
-        # as computationally efficient:
-        #self.w = np.dot(np.linalg.inv(data.X.T.dot(data.X)), X.T.dot(Y))
-        #self.w = np.dot(np.linalg.pinv(data.X), Y)
-        if self.fit_bias :
-            data.eliminateFeatures([data.numFeatures -1])
-            self.bias = self.w[-1]
-            self.w = self.w[:-1]
-        # this should be the last command in the train function
-        self.log.trainingTime = self.getTrainingTime()
-    def decisionFunc(self, data, i) :
-        return np.dot(self.w, data.X[i]) + self.bias
-    def classify(self, data, i) :
-        score = self.decisionFunc(data, i)
-        classification = 1 if score > 0 else 0
-        return (classification, score)
-</file>
-Now let's play with the code.
-<code python>
-from PyML import *
-import ridge_regression
-rr = ridge_regression.RidgeRegression(regression=True)
-</code>
-We are going to use ridge regression for regression, so we have to set the regression flag to True.
-Next we will read in some data taken from the UCI machine learning repository.  The task is to predict
-where in the body a CT scan is obtained from.  Here's a ([[http://archive.ics.uci.edu/ml/datasets/Relative+location+of+CT+slices+on+axial+axis | link to the data]].
-<code python>
-data = vectorDatasets.PyVectorDataSet('../data/slice_localization_data.csv', labelsColumn = -1, numericLabels=True)
-<code>
-Note that we had to tell PyML to interpret the labels as numeric.
-Evaluating the classifier:
-<code python>
-results = rr.cv(data)
-</code>
-A couple of other things to do with the data:
-<code python>
-# how are the labels distributed?
-from matplotlib import pyplot as plt
-plt.hist(data.labels.Y, 50)
-# Looking at the weight vectors
-rr.train(data)
-plt.hist(rr.w, 25)
-</code>
-Using ridge regression as a classifier:
-<code python>
-from PyML import *
-data = vectorDatasets.PyVectorDataSet('../data/gisette_sample.data', labelsColumn = 0)
-import ridge_regression
-rr = ridge_regression.RidgeRegression()
-rr.train(data)
-from matplotlib import pyplot as plt
-plt.hist(rr.w, 100)
-import perceptron
-p = perceptron.Perceptron()
-p.train(data)
-plt.hist(p.w, 100)
-# compare accuracy of ridge regression and the perceptron
-perceptron_results = p.stratifiedCV(data)
-ridge_results = rr.stratifiedCV(data)
-</code>

CS545 fall 2016

User Tools

Site Tools

Differences

Page Tools