Darwin  1.10(beta)
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
drwnHistogram.h
1 /******************************************************************************
2 ** DARWIN: A FRAMEWORK FOR MACHINE LEARNING RESEARCH AND DEVELOPMENT
3 ** Distributed under the terms of the BSD license (see the LICENSE file)
4 ** Copyright (c) 2013, Jason Corso, Stephen Gould
5 ** All rights reserved.
6 **
7 ******************************************************************************
8 ** FILENAME: drwnHistogram.h
9 ** AUTHOR(S): Jason Corso <jcorso@buffalo.edu>
10 ** Stephen Gould <stephen.gould@anu.edu.au>
11 ** DESCRIPTION:
12 ** Implements a simple templated histogram class.
13 **
14 *****************************************************************************/
15 
16 #pragma once
17 
18 #include <cstdio>
19 #include <cstdlib>
20 #include <cstring>
21 #include <math.h>
22 
23 #include "drwnBase.h"
24 
25 // drwnPCA class --------------------------------------------------------------
27 
28 template <typename TYPE>
30  protected:
31  double _mass;
32  int _numBins;
33  TYPE * _bins;
34  TYPE _min;
35  TYPE _max;
36 
37  // more transient values used in locating bins, etc...
38  TYPE _alpha;
39  TYPE _beta;
40 
41  public:
43  drwnHistogram(int n, double minVal, double maxVal) :
44  _mass(0.0), _numBins(n), _bins(NULL), _min(minVal), _max(maxVal) {
45  DRWN_ASSERT(minVal < maxVal);
46  _bins = (TYPE *)malloc(sizeof(TYPE)*n);
47  memset(_bins, 0, sizeof(TYPE)*n);
48  _alpha = ((double)_numBins) / (_max - _min);
49  _beta = 1.0 / _alpha;
50  }
51 
54  _mass(h._mass), _numBins(h._numBins), _bins(NULL), _min(h._min), _max(h._max),
55  _alpha(h._alpha), _beta(h._beta) {
56  _bins = (TYPE *)malloc(sizeof(TYPE) * _numBins);
57  memcpy(_bins, h._bins,sizeof(TYPE) * _numBins);
58  }
59 
62  if (_bins != NULL) {
63  free(_bins);
64  _bins = NULL;
65  }
66  }
67 
68  inline void addSample(TYPE s) {
69  addWeightedSample(s, 1.0);
70  }
71 
72  void addWeightedSample(TYPE s, double w) {
73  const int binIndex = computeBin(s);
74 
75  if (_mass == 0.0) {
76  _bins[binIndex] = 1;
77  _mass = w;
78  } else {
79  const double oldmass = _mass;
80  _mass += w;
81  for (int i = 0; i < _numBins; i++) {
82  if (i == binIndex) {
83  _bins[i] = (oldmass * _bins[i] + w) / _mass;
84  } else {
85  _bins[i] = oldmass * _bins[i] / _mass;
86  }
87  }
88  }
89  }
90 
91  void appendToFile(FILE* fp) const {
92  fprintf(fp,"%d\n", _numBins);
93  for (int i = 0; i < _numBins; i++) {
94  fprintf(fp, "%lf%s", (double)_bins[i], (i == _numBins - 1) ? "\n" : " ");
95  }
96  }
97 
101  double chiSquared(const drwnHistogram& H) const {
102  DRWN_ASSERT(H._numBins == _numBins);
103  double chi = 0.0;
104 
105  for (int i = 0; i < _numBins; i++) {
106  const double a = _bins[i] + H._bins[i];
107  if (a == 0.0) continue;
108 
109  double b = _bins[i] - H._bins[i];
110  chi += b*b / a;
111  }
112  return 0.5 * chi;
113  }
114 
115  void clear() {
116  _mass = 0.0;
117  memset(_bins, 0, sizeof(TYPE)*_numBins);
118  }
119 
120  int computeBin(const TYPE& s) const {
121  int binIndex;
122 
123  if (s <= _min) {
124  binIndex = 0;
125  } else if (s >= _max) {
126  binIndex = _numBins - 1;
127  } else {
128  binIndex = std::max((int)(_alpha * (s - _min)), _numBins - 1);
129  }
130 
131  return binIndex;
132  }
133 
134  double entropy() const {
135  double entropy = 0.0;
136  for (int i = 0; i < _numBins; i++) {
137  entropy -= (_bins[i] == 0) ? 0.0 : _bins[i] * log((double)_bins[i]);
138  }
139  return entropy;
140  }
141 
142  void convertInternalToCDF() {
143  for (int i = 1; i < _numBins; i++) {
144  _bins[i] += _bins[i-1];
145  }
146  }
147 
148  double euclidean(const drwnHistogram& H) const {
149  DRWN_ASSERT(H._numBins == _numBins);
150  double euc = 0.0;
151 
152  for (int i = 0; i < _numBins; i++) {
153  euc += (_bins[i] - H._bins[i]) * (_bins[i] - H._bins[i]);
154  }
155 
156  return sqrt(euc);
157  }
158 
159  double getBinCenter(int b) const { return _min + _beta*(double)b + (_beta / 2.0); }
160  double getBinLeft(int b) const { return _min + _beta*(double)b; }
161  double getBinRight(int b) const { return _min + _beta*(double)(b+1); }
162  double getBinMass(int i) const { return _bins[i] * _mass; }
163  double getBinWeight(int i) const { return _bins[i]; }
164 
165  double getBinWeightMax() const {
166  double m = getBinWeight(0);
167  for (int i = 1; i < _numBins; i++) {
168  m = std::max(m, getBinWeight(i));
169  }
170  return m;
171  }
172 
173  double getBinWeightMax(int &binIndex) const {
174  double m = getBinWeight(0);
175  binIndex = 0;
176  for (int i = 1; i < _numBins; i++) {
177  const double mm = getBinWeight(i);
178  if (mm > m) {
179  binIndex = i;
180  m = mm;
181  }
182  }
183 
184  return m;
185  }
186 
187  double getBinWeightSum() const {
188  double sum = 0.0;
189  for (int i = 0; i < _numBins; i++) {
190  sum += getBinWeight(i);
191  }
192  return sum;
193  }
194 
195  double getLikelihood(const TYPE& d) const { return _bins[computeBin(d)]; }
196 
197  double getMass() const { return _mass; }
198  double getMax() const { return _max; }
199  double getMin() const { return _min; }
200  int getNumberOfBins() const { return _numBins; }
201 
202  double intersect(const drwnHistogram& H) const {
203  DRWN_ASSERT(_numBins == H._numBins);
204  double val = 0.0;
205 
206  for (int i = 0; i < _numBins; i++) {
207  val += std::min(_bins[i], H._bins[i]);
208  }
209 
210  return val;
211  }
212 
214  double klDistance(const drwnHistogram& H) const {
215  return 0.5 * (klDivergence(H) + H.klDivergence(*this));
216  }
217 
219  double klDivergence(const drwnHistogram& H) const {
220  double d = 0.0;
221  for (int i = 0; i < _numBins; i++) {
222  if ((_bins[i] != 0) && (H._bins[i] != 0)) {
223  d += _bins[i] * log(_bins[i]/H._bins[i]);
224  }
225  }
226  return d;
227  }
228 
229  double l1distance(const drwnHistogram& H) const {
230  double d = 0.0;
231  for (int i = 0; i < _numBins; i++) {
232  d += fabs(_bins[i] - H._bins[i]);
233  }
234  return d;
235  }
236 
237  void mergeHistogram(const drwnHistogram& H) {
238  for (int i = 0; i < _numBins; i++) {
239  _bins[i] = (_bins[i] + H._bins[i]) / 2.0;
240  }
241  _mass += H.getMass();
242  }
243 
246  void mergeWeightedHistogram(const drwnHistogram& H, double w) {
247  for (int i = 0; i < _numBins; i++) {
248  _bins[i] += H._bins[i] * w;
249  }
250  _mass += w;
251  }
252 
253  void setAndNormalize(const drwnHistogram* H) {
254  double invSum = 1.0 / H->getBinWeightSum();
255  for (int i = 0; i < _numBins; i++) {
256  _bins[i] = invSum * H->_bins[i];
257  }
258  _mass = H->_mass;
259  }
260 };
261 
void mergeWeightedHistogram(const drwnHistogram &H, double w)
only works for the case that the initial histogram here is 0 mass
Definition: drwnHistogram.h:246
~drwnHistogram()
destructor
Definition: drwnHistogram.h:61
double chiSquared(const drwnHistogram &H) const
compute and return the chi-squared distance between the two histograms d(x,y) = sum( (xi-yi)^2 / (xi+...
Definition: drwnHistogram.h:101
double klDivergence(const drwnHistogram &H) const
compute the one-way kldivergence from this to H
Definition: drwnHistogram.h:219
double klDistance(const drwnHistogram &H) const
compute the symmetric kl Difference (averaging the two assymetric)
Definition: drwnHistogram.h:214
drwnHistogram(int n, double minVal, double maxVal)
constructor
Definition: drwnHistogram.h:43
drwnHistogram(const drwnHistogram &h)
copy constructor
Definition: drwnHistogram.h:53
Implements a simple templated histogram class.
Definition: drwnHistogram.h:29