Implements a cacheable dataset containing feature vectors, labels and optional weights.
More...
|
| drwnDataset () |
| default constructor
|
|
| drwnDataset (const drwnDataset< XType, YType, WType > &d) |
| copy constructor
|
|
| drwnDataset (const char *filename) |
| construct and load dataset from file
|
|
bool | empty () const |
| return true if the dataset is empty
|
|
int | size () const |
| return the number of samples in the dataset
|
|
bool | hasWeights () const |
| return true if the dataset contains weighted samples
|
|
bool | hasIndexes () const |
| return true if the dataset has external indices associated with each sample
|
|
bool | valid () const |
| return true if the dataset is valid (e.g., number of targets equals number of feature vectors)
|
|
int | count (const YType &label) const |
| returns the number of samples with a given target label
|
|
void | reserve (int reserveSize) |
| pre-allocate memory for storing samples (feature vectors and targets)
|
|
int | numFeatures () const |
| returns the number of features in the feature vector
|
|
YType | minTarget () const |
| returns the minimum target value in the dataset
|
|
YType | maxTarget () const |
| returns the maximum target value in the dataset
|
|
void | clear () |
| clears all data in the dataset
|
|
int | write (const char *filename, bool bAppend=false) const |
| writes the current dataset to disk (optionally appending to an existing dataset)
|
|
int | write (const char *filename, int startIndx, int endIndx, bool bAppend=false) const |
| writes a range of samples to disk (optionally appending to an existing dataset)
|
|
int | read (const char *filename, bool bAppend=false) |
| reads a dataset from disk (optionally appending to the current dataset)
|
|
int | read (const char *filename, int startIndx, int endIndx, bool bAppend=false) |
| reads a range of samples from disk (optionally appending to the current dataset)
|
|
int | append (const drwnDataset< XType, YType, WType > &d) |
| appends the samples from another dataset to this dataset
|
|
int | append (const vector< XType > &x, const YType &y) |
| appends a sample (feature vector and target) to the dataset
|
|
int | append (const vector< XType > &x, const YType &y, const WType &w) |
| appends a weighted sample (feature vector and target) to the dataset
|
|
int | append (const vector< XType > &x, const YType &y, const WType &w, int indx) |
| appends a weighted sample with associated external index to the dataset
|
|
int | subSample (int sampleRate, bool bBalanced=false) |
| subsample a dataset (balanced is only valid for discrete target types) if bBalanced is true then sampleRate is applied to most abundant target More...
|
|
template<typename XType, typename YType, typename WType>
class drwnDataset< XType, YType, WType >
Implements a cacheable dataset containing feature vectors, labels and optional weights.
The dataset can be used by various machine learning algorithms. The dataset is stored on disk as
<version, flags (int32)> <num
features (int32)>
<label> <feature 1> ... <feature n> (<weight>)? (<index>)?
...
- Warning
- The API for drwnDataset is not stable.