Darwin  1.10(beta)
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
drwnClassificationResults.h
1 /******************************************************************************
2 ** DARWIN: A FRAMEWORK FOR MACHINE LEARNING RESEARCH AND DEVELOPMENT
3 ** Distributed under the terms of the BSD license (see the LICENSE file)
4 ** Copyright (c) 2007-2015, Stephen Gould
5 ** All rights reserved.
6 **
7 ******************************************************************************
8 ** FILENAME: drwnClassificationResults.h
9 ** AUTHOR(S): Stephen Gould <stephen.gould@anu.edu.au>
10 **
11 *****************************************************************************/
12 
13 #pragma once
14 
15 #include <cstdlib>
16 #include <vector>
17 #include <map>
18 
19 #include "drwnBase.h"
20 #include "drwnDataset.h"
21 #include "drwnClassifier.h"
22 
23 using namespace std;
24 
25 // drwnClassificationResults ------------------------------------------------
47 
49  public:
50  static bool INCLUDE_MISSES;
51 
52  protected:
54  map<double, pair<int, int> > _scoredResults;
57  double _posWeight;
58 
59  public:
64  virtual ~drwnClassificationResults();
65 
67  inline int numPositives() const { return _numPositiveSamples; }
69  inline int numNegatives() const { return _numNegativeSamples; }
71  inline int numSamples() const { return _numPositiveSamples + _numNegativeSamples; }
73  inline int numThresholds() const { return (int)_scoredResults.size(); }
75  inline int numMisses() const;
76 
78  inline double getPosWeight() const { return _posWeight; }
80  inline void setPosWeight(double w) { DRWN_ASSERT(w > 0.0); _posWeight = w; }
81 
84  inline void normalize();
85 
86  // i/o
88  void clear();
90  bool write(const char *filename) const;
92  bool read(const char *filename);
93 
94  // modify the statistics
96  void accumulate(const drwnClassificationResults& c);
99  void accumulate(const drwnClassifierDataset& dataset,
100  drwnClassifier const *classifier, int positiveClassId = 1);
102  void accumulatePositives(double score, int count = 1);
104  void accumulatePositives(const vector<double>& scores);
106  void accumulateNegatives(double score, int count = 1);
108  void accumulateNegatives(const vector<double> &scores);
109 
111  void accumulateMisses(int count = 1);
112 
113  // reduce the number of points in the curve aggregating nearby scores
114  // TODO: void quantize(double minThreshold = 0.0, double maxThreshold = 1.0, int numBins = 100);
115 };
116 
117 // drwnPRCurve --------------------------------------------------------------
121 
123  public:
125  drwnPRCurve();
128  virtual ~drwnPRCurve();
129 
131  vector<pair<double, double> > getCurve() const;
133  void writeCurve(const char *filename) const;
135  double averagePrecision(unsigned numPoints = 11) const;
136 };
137 
138 // drwnClassificationResults inline functions -------------------------------
139 
141 {
142  if ((_numPositiveSamples > 0) && (_numNegativeSamples > 0)) {
143  _posWeight = (double)_numNegativeSamples / (double)_numPositiveSamples;
144  } else {
145  _posWeight = 1.0;
146  }
147 }
148 
150  int count = _numPositiveSamples;
151  for (map<double, pair<int, int> >::const_iterator it = _scoredResults.begin();
152  it != _scoredResults.end(); it++) {
153  count -= it->second.first;
154  }
155  return count;
156 }
void setPosWeight(double w)
set the relative weight of a positive sample to a negative sample
Definition: drwnClassificationResults.h:80
static bool INCLUDE_MISSES
true if some positive samples are never scored
Definition: drwnClassificationResults.h:50
int numSamples() const
return the total number (positive and negative) of samples accumulated
Definition: drwnClassificationResults.h:71
int numThresholds() const
return the number of unique classification scores
Definition: drwnClassificationResults.h:73
map< double, pair< int, int > > _scoredResults
number of positives (first) and negatives (second) grouped by score
Definition: drwnClassificationResults.h:54
int numPositives() const
return the number os positive samples accumulated
Definition: drwnClassificationResults.h:67
int numMisses() const
return the number of positive samples that have not been scored
Definition: drwnClassificationResults.h:149
Precision-recall curve.
Definition: drwnClassificationResults.h:122
void normalize()
this will change the weight of the positive examples such that overall positive and negative examples...
Definition: drwnClassificationResults.h:140
int numNegatives() const
return the number of negative samples accumulated
Definition: drwnClassificationResults.h:69
Implements the interface for a generic machine learning classifier.
Definition: drwnClassifier.h:31
double getPosWeight() const
return the relative weight of a positive sample to a negative sample
Definition: drwnClassificationResults.h:78
double _posWeight
weight of positive-to-negative count
Definition: drwnClassificationResults.h:57
int _numPositiveSamples
must be greater than sum(_scoredResults.first)
Definition: drwnClassificationResults.h:55
int _numNegativeSamples
must be must be equal to sum(_scoredResults.second)
Definition: drwnClassificationResults.h:56
Encapsulates summary of classifier output from which various curves can be generated (e...
Definition: drwnClassificationResults.h:48
Implements a cacheable dataset containing feature vectors, labels and optional weights.
Definition: drwnDataset.h:43