Darwin  1.10(beta)
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
drwnMultiClassLogistic.h
Go to the documentation of this file.
1 /******************************************************************************
2 ** DARWIN: A FRAMEWORK FOR MACHINE LEARNING RESEARCH AND DEVELOPMENT
3 ** Distributed under the terms of the BSD license (see the LICENSE file)
4 ** Copyright (c) 2007-2015, Stephen Gould
5 ** All rights reserved.
6 **
7 ******************************************************************************
8 ** FILENAME: drwnMultiClassLogistic.h
9 ** AUTHOR(S): Stephen Gould <stephen.gould@anu.edu.au>
10 **
11 *****************************************************************************/
12 
13 #pragma once
14 
15 #include <cstdlib>
16 #include <vector>
17 
18 #include "drwnBase.h"
19 #include "drwnFeatureMaps.h"
20 #include "drwnClassifier.h"
21 #include "drwnOptimizer.h"
22 
23 using namespace std;
24 
26 
27 // drwnMultiClassLogisticBase -----------------------------------------------
29 
31  public:
32  static double REG_STRENGTH;
33  static int MAX_ITERATIONS;
34 
35  protected:
36  VectorXd _theta;
38  double _lambda;
39 
40  // cached data for parameter estimation
41  // TODO: change to drwnDataset when ownership flag is implemented
42  const vector<vector<double> > *_features;
43  const vector<int> *_targets;
44  const vector<double> *_weights;
45 
46  public:
50  drwnMultiClassLogisticBase(unsigned n, unsigned k = 2);
54 
55  // access functions
56  virtual const char *type() const { return "drwnMultiClassLogistic"; }
57 
58  // i/o
59  virtual bool save(drwnXMLNode& xml) const;
60  virtual bool load(drwnXMLNode& xml);
61 
62  // training
64  virtual double train(const drwnClassifierDataset& dataset);
65  virtual double train(const vector<vector<double> >& features,
66  const vector<int>& targets);
67  virtual double train(const vector<vector<double> >& features,
68  const vector<int>& targets, const vector<double>& weights);
69 
70  // evaluation (log-probability)
72  virtual void getClassScores(const vector<double>& features,
73  vector<double>& outputScores) const = 0;
74 
75  protected:
76  // drwnOptimizer interface
77  double objective(const double *x) const;
78  void gradient(const double *x, double *df) const;
79  virtual double objectiveAndGradient(const double *x, double *df) const = 0;
80 };
81 
82 // drwnTMultiClassLogistic -----------------------------------------------------
90 
91 template<class FeatureMap = drwnBiasJointFeatureMap>
93  public:
97  drwnTMultiClassLogistic(unsigned n, unsigned k = 2) :
98  drwnMultiClassLogisticBase(n, k) { initialize(n, k); }
101  drwnMultiClassLogisticBase(c) { /* do nothing */ }
102 
103  ~drwnTMultiClassLogistic() { /* do nothing */ }
104 
105  // access
107  return new drwnTMultiClassLogistic<FeatureMap>(*this);
108  }
109 
110  // initialization
111  virtual void initialize(unsigned n, unsigned k = 2);
112 
113  // evaluation (log-probability)
115  virtual void getClassScores(const vector<double>& features,
116  vector<double>& outputScores) const;
117 
118  protected:
119  virtual double objectiveAndGradient(const double *x, double *df) const;
120 };
121 
122 // drwnMultiClassLogistic ------------------------------------------------------
126 
128 
129 // drwnTMultiClassLogistic implementation --------------------------------------
130 
131 template<class FeatureMap>
133 {
135  const FeatureMap phi(_nFeatures, _nClasses);
136  const int m = phi.numParameters();
137  if (m == 0) {
138  _theta = VectorXd();
139  } else {
140  _theta = VectorXd::Zero(phi.numParameters());
141  }
142 }
143 
144 template<class FeatureMap>
145 void drwnTMultiClassLogistic<FeatureMap>::getClassScores(const vector<double>& features,
146  vector<double>& outputScores) const
147 {
148  DRWN_ASSERT((int)features.size() == _nFeatures);
149 
151  vector<double> t(_theta.rows());
152  Eigen::Map<VectorXd>(&t[0], t.size()) = _theta;
153 
154  const FeatureMap phi(_nFeatures, _nClasses);
155  outputScores.resize(_nClasses);
156  for (int k = 0; k < _nClasses; k++) {
157  outputScores[k] = phi.dot(t, features, k);
158  }
159 }
160 
161 template<class FeatureMap>
162 double drwnTMultiClassLogistic<FeatureMap>::objectiveAndGradient(const double *x, double *df) const
163 {
164  double negLogL = 0.0;
165  int numTerms = 0;
166 
167  const FeatureMap phi(_nFeatures, _nClasses);
168  vector<double> p(_nClasses);
169 
170  const vector<double> vx(x, x + _n);
171  vector<double> vdf(_n, 0.0);
172 
173  for (unsigned n = 0; n < _features->size(); n++) {
174  if ((*_targets)[n] < 0) continue; // skip missing labels
175  double alpha = (_weights == NULL) ? 1.0 : (*_weights)[n];
176 
177  // compute marginal for training sample
178  double maxValue = 0.0;
179  for (int k = 0; k < _nClasses; k++) {
180  p[k] = phi.dot(vx, (*_features)[n], k);
181  maxValue = std::max(maxValue, p[k]);
182  }
183 
184  // exponentiate and normalize
185  double Z = 0.0;
186  for (vector<double>::iterator it = p.begin(); it != p.end(); ++it) {
187  Z += (*it = exp(*it - maxValue));
188  }
189 
190  // increment log-likelihood
191  negLogL -= alpha * log(p[(*_targets)[n]] / Z);
192  numTerms += 1;
193 
194  // increment derivative
195  p[(*_targets)[n]] -= Z;
196  for (int k = 0; k < _nClasses; k++) {
197  phi.mac(vdf, (*_features)[n], alpha * p[k] / Z, k);
198  }
199  }
200 
201  memcpy((void *)df, (void *)&vdf[0], _n * sizeof(double));
202 
203  if (numTerms == 0) return 0.0;
204  negLogL /= (double)numTerms;
205  Eigen::Map<VectorXd>(df, _n) /= (double)numTerms;
206 
207  // regularization
208  switch (_regularizer) {
209  case 0: // sum-of-squares
210  {
211  double weightNorm = 0.0;
212  for (unsigned i = 0; i < _n; i++) {
213  weightNorm += x[i] * x[i];
214  df[i] += _lambda * x[i];
215  }
216 
217  negLogL += 0.5 * _lambda * weightNorm;
218  }
219  break;
220 
221  case 1: // huber
222  {
223  double dh;
224  for (unsigned i = 0; i < _n; i++) {
225  negLogL += _lambda * drwn::huberFunctionAndDerivative(x[i], &dh, 1.0e-3);
226  df[i] += _lambda * dh;
227  }
228  }
229  break;
230 
231  default:
232  DRWN_LOG_ERROR("unsupported regularizer " << _regularizer);
233  }
234 
235  return negLogL;
236 }
virtual double train(const drwnClassifierDataset &dataset)=0
train the parameters of the classifier from a drwnClassifierDataset object
virtual void getClassScores(const vector< double > &features, vector< double > &outputScores) const
compute the unnormalized log-probability for a single feature vector
Definition: drwnMultiClassLogistic.h:145
virtual void getClassScores(const vector< double > &features, vector< double > &outputScores) const =0
compute the unnormalized log-probability for a single feature vector
virtual const char * type() const
returns object type as a string (e.g., Foo::type() { return "Foo"; })
Definition: drwnMultiClassLogistic.h:56
drwnTMultiClassLogistic()
default constructor
Definition: drwnMultiClassLogistic.h:95
virtual void initialize(unsigned n, unsigned k=2)
initialize the classifier object for n features and k classes
Definition: drwnClassifier.cpp:47
static double REG_STRENGTH
default strength of regularizer (used during construction)
Definition: drwnMultiClassLogistic.h:32
drwnTMultiClassLogistic(unsigned n, unsigned k=2)
construct a k-class logistic classifier for data of dimension n
Definition: drwnMultiClassLogistic.h:97
drwnTMultiClassLogistic drwnMultiClassLogistic
Conveinience type declaration for multi-class logistic classifier with default feature mapping...
Definition: drwnMultiClassLogistic.h:127
virtual double objectiveAndGradient(const double *x, double *df) const
returns value of objective function and populates gradient df at point x
Definition: drwnMultiClassLogistic.h:162
Common functionality for drwnMultiClassLogistic.
Definition: drwnMultiClassLogistic.h:30
Interface for solving large-scale unconstrained optimization problems using L-BFGS.
Definition: drwnOptimizer.h:68
Implements the interface for a generic machine learning classifier.
Definition: drwnClassifier.h:31
drwnTMultiClassLogistic(const drwnTMultiClassLogistic< FeatureMap > &c)
copy constructor
Definition: drwnMultiClassLogistic.h:100
VectorXd _theta
joint feature map weights
Definition: drwnMultiClassLogistic.h:36
Implements a multi-class logistic classifier templated on a drwnJointFeatureMap.
Definition: drwnMultiClassLogistic.h:92
virtual void initialize(unsigned n, unsigned k=2)
initialize the classifier object for n features and k classes
Definition: drwnMultiClassLogistic.h:132
static int MAX_ITERATIONS
maximum number of training iterations
Definition: drwnMultiClassLogistic.h:33
Implements a cacheable dataset containing feature vectors, labels and optional weights.
Definition: drwnDataset.h:43
virtual void getClassScores(const vector< double > &features, vector< double > &outputScores) const =0
compute the unnormalized log-probability for a single feature vector
int _regularizer
regularization option
Definition: drwnMultiClassLogistic.h:37
double _lambda
regularization strength
Definition: drwnMultiClassLogistic.h:38
virtual drwnTMultiClassLogistic< FeatureMap > * clone() const
returns a copy of the class usually implemented as virtual Foo* clone() { return new Foo(*this); } ...
Definition: drwnMultiClassLogistic.h:106