21#include "third_party/tinyxml2/tinyxml2.h"
26const src_problem::default_symbols_t src_problem::default_symbols = {};
40std::set<std::vector<C>> seq_with_rep(
const std::set<C> &availables,
43 Expects(availables.size());
46 std::set<std::vector<C>> ret;
48 std::function<void (std::size_t, std::vector<C>)> swr;
49 swr = [&](std::size_t left, std::vector<C> current)
57 for (
auto elem : availables)
59 current.push_back(elem);
60 swr(left - 1, current);
99 vitaINFO <<
"Reading dataset " << ds <<
"...";
100 data(dataset_t::training).read(ds);
102 vitaINFO <<
"...dataset read. Examples: " <<
data(dataset_t::training).
size()
125 vitaINFO <<
"Reading dataset from input stream...";
126 data(dataset_t::training).read_csv(ds);
128 vitaINFO <<
"...dataset read. Examples: " <<
data(dataset_t::training).
size()
160 const std::filesystem::path &symbols,
typing t)
163 vitaINFO <<
"Reading dataset " << ds <<
"...";
164 data(dataset_t::training).read(ds);
166 vitaINFO <<
"....dataset read. Examples: " <<
data(dataset_t::training).
size()
196 vitaINFO <<
"Setting up terminals...";
198 const auto &columns(training_.columns);
199 if (columns.size() < 2)
205 for (std::size_t i(1); i < columns.size(); ++i)
208 const auto provided_name(columns[i].name);
209 const auto name(provided_name.empty() ?
"X" + std::to_string(i)
213 if (insert<variable>(name,
static_cast<unsigned>(i - 1), category))
217 for (
const auto &s : columns[i].states)
218 switch (columns[i].domain)
221 insert<constant<D_DOUBLE>>(std::get<D_DOUBLE>(s), category);
224 insert<constant<D_INT>>(std::get<D_INT>(s), category);
227 insert<constant<D_STRING>>(std::get<D_STRING>(s), category);
240 vitaINFO <<
"...terminals ready. Variables:" <<
variables;
281 return file.empty() ? setup_symbols_impl() : setup_symbols_impl(file);
292std::size_t src_problem::setup_symbols_impl()
294 vitaINFO <<
"Setting up default symbol set...";
297 const auto used_categories(
categories.used_categories());
298 std::size_t inserted(0);
300 for (
const auto &category : used_categories)
301 if (compatible({category}, {
"numeric"},
categories))
303 const std::vector<std::string>
base(
304 {
"1.0",
"2.0",
"3.0",
"4.0",
"5.0",
"6.0",
"7.0",
"8.0",
"9.0",
305 "FABS",
"FADD",
"FDIV",
"FLN",
"FMUL",
"FMOD",
"FSUB"});
307 for (
const auto &s:
base)
308 if (sset.
insert(factory_.
make(s, {category})))
311 else if (compatible({category}, {
"string"},
categories))
313 if (sset.
insert(factory_.
make(
"SIFE", {category, 0})))
317 vitaINFO <<
"...default symbol set ready. Symbols: " << inserted;
329std::size_t src_problem::setup_symbols_impl(
const std::filesystem::path &file)
331 vitaINFO <<
"Reading symbol set " << file <<
"...";
332 tinyxml2::XMLDocument doc;
333 if (doc.LoadFile(file.string().c_str()) != tinyxml2::XML_SUCCESS)
334 throw exception::data_format(
"Symbol set format error");
337 const auto used_categories(
categories.used_categories());
338 std::size_t parsed(0);
342 tinyxml2::XMLHandle handle(&doc);
343 auto *symbolset(handle.FirstChildElement(
"symbolset").ToElement());
346 throw exception::data_format(
"Empty symbol set");
348 for (
auto *s(symbolset->FirstChildElement(
"symbol"));
350 s = s->NextSiblingElement(
"symbol"))
352 if (!s->Attribute(
"name"))
354 vitaERROR <<
"Skipped unnamed symbol in symbolset";
357 const std::string sym_name(s->Attribute(
"name"));
359 if (
const char *sym_sig = s->Attribute(
"signature"))
361 for (
auto category : used_categories)
362 if (compatible({category}, {std::string(sym_sig)},
categories))
364 const auto n_args(factory_.
args(sym_name));
365 std::string signature(sym_name +
":");
367 for (std::size_t j(0); j < n_args; ++j)
368 signature +=
" " + std::to_string(category);
369 vitaDEBUG <<
"Adding to symbol set " << signature;
371 sset.
insert(factory_.
make(sym_name, cvect(n_args, category)));
376 auto *sig(s->FirstChildElement(
"signature"));
379 vitaERROR <<
"Skipping " << sym_name <<
" symbol (empty signature)";
383 std::vector<std::string> args;
384 for (
auto *arg(sig->FirstChildElement(
"arg"));
386 arg = arg->NextSiblingElement(
"arg"))
390 vitaERROR <<
"Skipping " << sym_name <<
" symbol (wrong signature)";
395 args.push_back(arg->GetText());
400 const auto sequences(detail::seq_with_rep(used_categories, args.size()));
404 for (
const auto &seq : sequences)
407 std::string signature(sym_name +
":");
408 for (
const auto &j : seq)
409 signature +=
" " + std::to_string(j);
410 vitaDEBUG <<
"Adding to symbol set " << signature;
419 vitaINFO <<
"...symbol set read. Symbols: " << parsed;
440bool src_problem::compatible(
const cvect &instance,
441 const std::vector<std::string> &pattern,
442 const category_set &categories)
const
444 Expects(instance.size() == pattern.size());
446 const auto sup(instance.size());
447 for (std::size_t i(0); i <
sup; ++i)
449 const std::string p_i(pattern[i]);
450 const bool generic(
from_weka(p_i) != d_void);
459 if (instance[i] !=
categories.column(p_i).category)
472 return static_cast<unsigned>(sset.
categories());
481 return static_cast<unsigned>(training_.
classes());
499 return t == dataset_t::training ? training_ : validation_;
508 return t == dataset_t::training ? training_ : validation_;
Information about the set of categories used in a specific problem.
A 2-dimensional labeled data structure with columns of potentially different types.
unsigned variables() const
Aggregates the problem-related data needed by an evolutionary program.
virtual bool is_valid() const
Provides a GP-specific interface to the generic problem class.
unsigned categories() const
std::size_t setup_symbols(typing=typing::weak)
Sets up the symbol set.
const dataframe & data(dataset_t=dataset_t::training) const
void setup_terminals(typing)
Inserts variables and states for nominal attributes into the symbol_set.
unsigned variables() const
bool is_valid() const override
src_problem()
New empty instance of src_problem.
std::size_t args(const std::string &) const
std::unique_ptr< symbol > make(const std::string &, cvect={0})
Creates a specific instance of a symbol.
void clear()
Clears the current symbol set.
bool enough_terminals() const
We want at least one terminal for every used category.
symbol * insert(std::unique_ptr< symbol >, double=1.0)
Adds a new symbol to the set.
category_t categories() const
base_t base(const value_t &v)
A simple shortcut for casting an value_t to base_t.
The main namespace for the project.
domain_t from_weka(const std::string &n)
dataset_t
Data/simulations are categorised in three sets:
typing
Category/type management of the dataframe columns.
std::size_t category_t
A category provide operations which supplement or supersede those of the domain but which are restric...