13#if !defined(VITA_DATAFRAME_H)
14#define VITA_DATAFRAME_H
26#include "utility/pocket_csv.h"
55 using examples_t = std::vector<example>;
56 using value_type = examples_t::value_type;
74 std::string name = {};
76 std::set<value_t> states = {};
79 using size_type = std::size_t;
83 const column_info &operator[](size_type i)
const {
return cols_[i]; }
84 column_info &operator[](size_type i) {
return cols_[i]; }
86 size_type size()
const {
return cols_.size(); }
87 bool empty()
const {
return cols_.empty(); }
89 auto begin()
const {
return cols_.begin(); }
90 auto begin() {
return cols_.begin(); }
91 auto end()
const {
return cols_.end(); }
92 auto end() {
return cols_.end(); }
94 const auto &front()
const {
return cols_.front(); }
95 auto &front() {
return cols_.front(); }
97 const auto &back()
const {
return cols_.back(); }
98 auto &back() {
return cols_.back(); }
100 void pop_back() { cols_.pop_back(); }
109 std::vector<column_info> cols_;
115 dataframe(std::istream &,
const params &);
116 explicit dataframe(
const std::filesystem::path &);
117 dataframe(
const std::filesystem::path &,
const params &);
120 using iterator = examples_t::iterator;
121 using const_iterator = examples_t::const_iterator;
122 using difference_type = examples_t::difference_type;
125 const_iterator
begin()
const;
127 const_iterator
end()
const;
129 value_type
front()
const;
134 iterator
erase(iterator, iterator);
137 std::size_t read(
const std::filesystem::path &);
138 std::size_t read(
const std::filesystem::path &,
const params &);
139 std::size_t read_csv(std::istream &);
140 std::size_t read_csv(std::istream &, params);
141 std::size_t read_xrff(std::istream &);
142 std::size_t read_xrff(std::istream &,
const params &);
147 std::size_t
size()
const;
157 columns_info columns;
160 bool read_record(
const record_t &,
bool);
161 example to_example(
const record_t &,
bool);
163 class_t encode(
const std::string &);
165 std::size_t read_csv(
const std::filesystem::path &,
const params &);
166 std::size_t read_xrff(
const std::filesystem::path &,
const params &);
167 std::size_t read_xrff(tinyxml2::XMLDocument &,
const params &);
172 std::map<std::string, class_t> classes_map_;
203 std::uintmax_t difficulty = 0;
206 void clear() { *
this =
example(); }
220 return lexical_cast<T>(e.
output);
233 Expects(std::holds_alternative<D_INT>(e.
output));
234 return std::get<D_INT>(e.
output);
241 {
dialect.has_header = pocket_csv::dialect::HAS_HEADER;
return *
this; }
243 {
dialect.has_header = pocket_csv::dialect::NO_HEADER;
return *
this; }
Information about the collection of columns (type, name, output index).
columns_info()
Constructs a new empty columns_info object.
void push_front(const column_info &)
Adds a new column at the front of the column list.
void build(const record_t &, bool)
Given an example compiles information about the columns of the dataframe.
void push_back(const column_info &)
Adds a new column at the end of the column list.
std::optional< std::size_t > output_index
Index of the column containing the output value (label).
filter_hook_t filter
A filter and transform function applied when reading data.
pocket_csv::dialect dialect
A 2-dimensional labeled data structure with columns of potentially different types.
void push_back(const example &)
Appends the given element to the end of the active dataset.
std::string class_name(class_t) const
unsigned variables() const
std::vector< std::string > record_t
Raw input record.
iterator erase(iterator, iterator)
Removes specified elements from the dataframe.
value_type front() const
Returns a constant reference to the first element in the dataframe.
std::function< bool(record_t &)> filter_hook_t
A filter and transform function (returns true for records that should be loaded and,...
void clear()
Removes all elements from the container.
dataframe()
New empty data instance.
The main namespace for the project.
class_t label(const dataframe::example &e)
Gets the class_t ID (aka label) for a given example.
domain_t from_weka(const std::string &n)
std::size_t class_t
The type used as class ID in classification tasks.
domain_t
In an environment where a symbol such as '+' may have many different meanings, it's useful to specify...
T label_as(const dataframe::example &e)
Get the output value for a given example.
std::variant< D_VOID, D_INT, D_DOUBLE, D_STRING > value_t
A variant containing the data types used by the interpreter for internal calculations / output value ...
Information about a single column of the dataset.
Stores a single element (row) of the dataset.
std::vector< value_t > input
The thing about which we want to make a prediction (aka instance).
value_t output
The answer for the prediction task either the answer produced by the machine learning system,...