30 using namespace Eigen;
32 #if defined(_WIN32)||defined(WIN32)||defined(__WIN32__)
47 inline T minElem(
const vector<T>& v);
51 inline T maxElem(
const vector<T>& v);
55 T mean(
const vector<T>& v);
59 T median(
const vector<T>& v);
64 T destructive_median(vector<T>& w);
68 T mode(
const vector<T>& v);
72 T variance(
const vector<T>& v);
76 T stdev(
const vector<T>& v);
80 int argmin(
const vector<T>& v);
83 int argmin(
const VectorXd &v);
87 vector<int> argmins(
const vector<vector<T> >& v);
91 int argmax(
const vector<T>& v);
94 int argmax(
const VectorXd &v);
98 vector<int> argmaxs(
const vector<vector<T> >& v);
102 int argrand(
const vector<double>& v);
105 int argrand(
const VectorXd &v);
108 template <
typename T>
109 T excessKurtosis(
const vector<T> &v);
111 template <
typename T>
112 vector<float> percentiles(
const vector<T> & v);
115 template <
typename T>
116 pair<T, T> range(
const vector<T>& v);
119 template <
typename T>
120 pair<T, T> range(
const vector<vector<T> >& v);
123 template <
typename T>
124 vector<T> extractSubVector(
const vector<T>& v,
const vector<int>& indx);
127 template <
typename T>
128 vector<T> removeOutliers(
const vector<T>& v,
129 const vector<double>& scores,
int keepSize);
132 template <
typename T>
133 set<set<T> > powerset(
const set<T>& s);
136 int roundUp(
int n,
int d);
139 bool containsInvalidEntries(
const vector<double> &v);
142 double logistic(
const vector<double>& theta,
const vector<double>& data);
144 double logistic(
const double *theta,
const double *data,
int n);
147 double entropy(
const std::vector<double>& p);
149 double entropy(
const std::vector<int>& counts);
152 double gini(
const std::vector<double>& p);
154 double gini(
const std::vector<int>& p);
157 double expAndNormalize(std::vector<double>& v);
159 double expAndNormalize(VectorXd& v);
162 inline double fastexp(
double x);
165 vector<int> randomPermutation(
int n);
168 template <
typename T>
169 void shuffle(vector<T>& v);
172 template <
typename T>
173 vector<T> subSample(
const vector<T>& v,
size_t n);
176 vector<double> linSpaceVector(
double startValue,
double endValue,
unsigned n = 10);
178 vector<double> logSpaceVector(
double startValue,
double endValue,
unsigned n = 10);
183 void predecessor(std::vector<int>& array,
int limit);
187 void successor(std::vector<int>& array,
int limit);
191 void predecessor(std::vector<int>& array,
const std::vector<int>& limits);
195 void successor(std::vector<int>& array,
const std::vector<int>& limits);
199 inline double huberFunction(
double x,
double m = 1.0);
201 inline double huberDerivative(
double x,
double m = 1.0);
203 inline double huberFunctionAndDerivative(
double x,
double *df,
double m = 1.0);
207 double bhattacharyyaDistance(std::vector<double>& p, std::vector<double>& q);
210 double euclideanDistanceSq(std::vector<double>& p, std::vector<double>& q);
213 double sum(
const vector<double> &v);
215 double sum(
const double *v,
size_t length);
218 double dot(
const double *x,
const double *y,
size_t length);
220 double dot(
const vector<double>& x,
const vector<double>& y);
225 template <
typename T>
226 T drwn::minElem(
const vector<T> & v)
229 case 0: DRWN_LOG_FATAL(
"invalid size");
break;
230 case 1:
return v.front();
break;
231 case 2:
return std::min(v.front(), v.back());
break;
235 for (
typename vector<T>::const_iterator i = v.begin() + 1; i != v.end(); ++i) {
236 minObj = std::min(minObj, *i);
242 template <
typename T>
243 T drwn::maxElem(
const vector<T> & v)
246 case 0: DRWN_LOG_FATAL(
"invalid size");
break;
247 case 1:
return v.front();
break;
248 case 2:
return std::max(v.front(), v.back());
break;
252 for (
typename vector<T>::const_iterator i = v.begin() + 1; i != v.end(); ++i) {
253 maxObj = std::max(maxObj, *i);
259 template <
typename T>
260 T drwn::mean(
const vector<T>& v)
262 DRWN_ASSERT(v.size() > 0);
266 for (
typename vector<T>::const_iterator i = v.begin(); i != v.end(); ++i) {
270 return sum / T(v.size());
273 template <
typename T>
274 T drwn::median(
const vector<T>& v)
276 DRWN_ASSERT(v.size() > 0);
279 if (w.size() % 2 == 1) {
280 int ix = w.size() / 2;
281 nth_element(w.begin(), w.begin()+ix, w.end());
285 int ix_sup = w.size()/2;
286 nth_element(w.begin(), w.begin() + ix_sup, w.end());
287 nth_element(w.begin(), w.begin() + ix_sup - 1, w.begin()+ ix_sup);
288 return T(0.5 * ( w[ix_sup] + w[ix_sup-1] ));
292 template <
typename T>
293 T drwn::destructive_median(vector<T> &w)
295 DRWN_ASSERT(w.size() > 0);
296 if (w.size() % 2 == 1) {
297 int ix = w.size() / 2;
298 nth_element(w.begin(), w.begin()+ix, w.end());
302 int ix_sup = w.size()/2;
303 nth_element(w.begin(), w.begin() + ix_sup, w.end());
304 nth_element(w.begin(), w.begin() + ix_sup - 1, w.begin()+ ix_sup);
305 return T(0.5 * ( w[ix_sup] + w[ix_sup-1] ));
309 template <
typename T>
310 T drwn::mode (
const vector<T>& v)
312 DRWN_ASSERT(v.size() > 0);
315 typename vector<T>::const_iterator modeElement = v.begin();
316 for (
typename vector<T>::const_iterator it = v.begin(); it != v.end(); it++) {
317 typename map<T, int>::iterator jt = w.find(*it);
319 jt = w.insert(w.end(), make_pair(*it, 0));
324 if (jt->second > maxCount) {
332 template <
typename T>
333 T drwn::variance(
const vector<T> & v)
335 DRWN_ASSERT(v.size() > 0);
340 for (
typename vector<T>::const_iterator i = v.begin(), last = v.end(); i != last; ++i) {
341 double dev = *i - mu;
345 return sum / T(v.size());
348 template <
typename T>
349 T drwn::stdev(
const vector<T> &v)
351 T std2 = variance(v);
352 return (std2 > 0.0 ? sqrt(std2) : 0.0);
355 template <
typename T>
356 int drwn::argmin(
const vector<T> & v)
361 case 0: minIndx = -1;
break;
362 case 1: minIndx = 0;
break;
363 case 2: minIndx = (v[0] <= v[1]) ? 0 : 1;
break;
367 for (
int i = 1; i < (int)v.size(); i++) {
368 if (v[i] < v[minIndx]) {
378 template <
typename T>
379 vector<int> drwn::argmins(
const vector<vector<T> >& v)
381 vector<int> minIndx(v.size(), -1);
382 for (
int i = 0; i < (int)v.size(); i++) {
383 minIndx[i] = argmin(v[i]);
389 template <
typename T>
390 int drwn::argmax(
const vector<T> & v)
395 case 0: maxIndx = -1;
break;
396 case 1: maxIndx = 0;
break;
397 case 2: maxIndx = (v[0] >= v[1]) ? 0 : 1;
break;
401 for (
int i = 1; i < (int)v.size(); i++) {
402 if (v[i] > v[maxIndx]) {
412 template <
typename T>
413 vector<int> drwn::argmaxs(
const vector<vector<T> >& v)
415 vector<int> maxIndx(v.size(), -1);
416 for (
int i = 0; i < (int)v.size(); i++) {
417 maxIndx[i] = argmax(v[i]);
423 template <
typename T>
424 T drwn::excessKurtosis(
const vector<T> & v)
426 DRWN_ASSERT(!v.empty());
429 T sigma_squared = variance(v);
432 for (
typename vector<T>::const_iterator i = v.begin(), last = v.end(); i != last; ++i) {
433 double dev = *i - mu;
434 double sqDev = dev * dev;
435 sum += sqDev * sqDev;
438 return sum / ( T(v.size() * sigma_squared * sigma_squared)) - 3.0;
441 template <
typename T>
442 vector<float> drwn::percentiles(
const vector<T> &v)
446 for (
int i = 0; i < v.size(); i++) {
448 for (
int j = 0; j < v.size(); j++) {
452 rval.push_back(
float(sum)/
float(v.size()));
457 template <
typename T>
458 pair<T, T> drwn::range(
const vector<T>& v)
460 DRWN_ASSERT(v.size() > 0);
462 typename vector<T>::const_iterator minObj(v.begin());
463 typename vector<T>::const_iterator maxObj(v.begin());
464 for (
typename vector<T>::const_iterator i = v.begin() + 1;
466 if (*i < *minObj) minObj = i;
467 if (*i > *maxObj) maxObj = i;
470 return make_pair(*minObj, *maxObj);
473 template <
typename T>
474 pair<T, T> drwn::range(
const vector<vector<T> >& v)
476 DRWN_ASSERT(v.size() > 0);
478 pair<T, T> r = range(*v.begin());
479 for (
typename vector<vector<T> >::const_iterator i = v.begin() + 1;
481 pair<T, T> ri = range(*i);
482 if (ri.first < r.first)
484 if (ri.second > r.second)
485 r.second = ri.second;
491 template <
typename T>
492 vector<T> drwn::extractSubVector(
const vector<T>& v,
const vector<int>& indx)
496 w.reserve(indx.size());
497 for (vector<int>::const_iterator it = indx.begin(); it != indx.end(); ++it) {
504 template <
typename T>
505 vector<T> drwn::removeOutliers(
const vector<T>& v,
506 const vector<double>& scores,
int keepSize)
508 DRWN_ASSERT(scores.size() == v.size());
509 if (keepSize >= (
int)v.size()) {
514 vector<pair<double, int> > indx(v.size());
515 for (
unsigned i = 0; i < v.size(); i++) {
516 indx[i] = make_pair(scores[i], i);
518 sort(indx.begin(), indx.end());
520 vector<T> w(keepSize);
521 unsigned startIndx = (v.size() - keepSize) / 2;
522 unsigned endIndx = startIndx + keepSize;
523 for (
unsigned i = startIndx; i < endIndx; i++) {
524 w[i - startIndx] = v[indx[i].second];
530 template <
typename T>
531 set<set<T> > drwn::powerset(
const set<T>& s)
536 result.insert(set<T>());
538 for (
typename set<T>::const_iterator it = s.begin(); it != s.end(); ++it) {
546 set<set<T> > smallP = powerset(smallS);
547 result.insert(smallP.begin(), smallP.end());
551 for (
typename set<set<T> >::const_iterator jt = smallP.begin();
552 jt != smallP.end(); ++jt) {
567 #define EXP_A (1048576.0 / M_LN2)
571 #define M_LN2 0.69314718055994530942
574 inline double drwn::fastexp(
double y)
576 if (y < -700.0)
return 0.0;
582 struct {
int j, i; } n;
584 struct {
int i, j; } n;
587 _eco.n.i = (int)(EXP_A * (y)) + (1072693248 - EXP_C);
592 template <
typename T>
593 void drwn::shuffle(vector<T>& v)
595 const size_t n = v.size();
597 for (
size_t i = 0; i < n - 1; i++) {
598 size_t j = rand() % (n - i);
599 std::swap(v[i], v[i + j]);
603 template <
typename T>
604 vector<T> drwn::subSample(
const vector<T>& v,
size_t n)
606 if (n >= v.size())
return v;
607 if (n == 0)
return vector<T>();
613 for (
size_t i = 0; i < n; i++) {
614 size_t j = rand() % (w.size() - i);
615 std::swap(w[i], w[i + j]);
623 inline double drwn::huberFunction(
double x,
double m)
625 if (x < -m)
return (m * (-2.0 * x - m));
626 if (x > m)
return (m * (2.0 * x - m));
631 inline double drwn::huberDerivative(
double x,
double m)
633 if (x < -m)
return -2.0 * m;
634 if (x > m)
return 2.0 * m;
639 inline double drwn::huberFunctionAndDerivative(
double x,
double *df,
double m)
643 return (m * (-2.0 * x - m));
646 return (m * (2.0 * x - m));
653 inline int drwn::roundUp(
int n,
int d) {
654 return (n % d == 0) ? n : n + d - (n % d);
void drwnInitializeRand()
initialize the standard C library random number generator with a time-of-day seed ...
Definition: drwnStatsUtils.cpp:33