Implements a cacheable dataset containing feature vectors, labels and optional weights.
More...
|
|
| drwnDataset () |
| | default constructor
|
| |
|
| drwnDataset (const drwnDataset< XType, YType, WType > &d) |
| | copy constructor
|
| |
|
| drwnDataset (const char *filename) |
| | construct and load dataset from file
|
| |
|
bool | empty () const |
| | return true if the dataset is empty
|
| |
|
int | size () const |
| | return the number of samples in the dataset
|
| |
|
bool | hasWeights () const |
| | return true if the dataset contains weighted samples
|
| |
|
bool | hasIndexes () const |
| | return true if the dataset has external indices associated with each sample
|
| |
|
bool | valid () const |
| | return true if the dataset is valid (e.g., number of targets equals number of feature vectors)
|
| |
|
int | count (const YType &label) const |
| | returns the number of samples with a given target label
|
| |
|
void | reserve (int reserveSize) |
| | pre-allocate memory for storing samples (feature vectors and targets)
|
| |
|
int | numFeatures () const |
| | returns the number of features in the feature vector
|
| |
|
YType | minTarget () const |
| | returns the minimum target value in the dataset
|
| |
|
YType | maxTarget () const |
| | returns the maximum target value in the dataset
|
| |
|
void | clear () |
| | clears all data in the dataset
|
| |
|
int | write (const char *filename, bool bAppend=false) const |
| | writes the current dataset to disk (optionally appending to an existing dataset)
|
| |
|
int | write (const char *filename, int startIndx, int endIndx, bool bAppend=false) const |
| | writes a range of samples to disk (optionally appending to an existing dataset)
|
| |
|
int | read (const char *filename, bool bAppend=false) |
| | reads a dataset from disk (optionally appending to the current dataset)
|
| |
|
int | read (const char *filename, int startIndx, int endIndx, bool bAppend=false) |
| | reads a range of samples from disk (optionally appending to the current dataset)
|
| |
|
int | append (const drwnDataset< XType, YType, WType > &d) |
| | appends the samples from another dataset to this dataset
|
| |
|
int | append (const vector< XType > &x, const YType &y) |
| | appends a sample (feature vector and target) to the dataset
|
| |
|
int | append (const vector< XType > &x, const YType &y, const WType &w) |
| | appends a weighted sample (feature vector and target) to the dataset
|
| |
|
int | append (const vector< XType > &x, const YType &y, const WType &w, int indx) |
| | appends a weighted sample with associated external index to the dataset
|
| |
| int | subSample (int sampleRate, bool bBalanced=false) |
| | subsample a dataset (balanced is only valid for discrete target types) if bBalanced is true then sampleRate is applied to most abundant target More...
|
| |
template<typename XType, typename YType, typename WType>
class drwnDataset< XType, YType, WType >
Implements a cacheable dataset containing feature vectors, labels and optional weights.
The dataset can be used by various machine learning algorithms. The dataset is stored on disk as
<version, flags (int32)> <num
features (int32)>
<label> <feature 1> ... <feature n> (<weight>)? (<index>)?
...
- Warning
- The API for drwnDataset is not stable.