25 : training_(prob.data(
dataset_t::training)),
26 validation_(prob.data(
dataset_t::validation)),
32 Ensures(validation_.
empty());
45 Expects(!training_.
empty());
49 vitaWARNING <<
"Holdout with 0% validation is unusual";
55 assert(validation_.
empty());
58 const auto available(training_.
size());
59 const auto skip(std::max<
decltype(available)>(
60 available * (100 - perc) / 100, 1));
61 assert(skip <= available);
64 for (std::size_t i(available - 1); i >= skip; --i)
66 auto curr(std::next(training_.
begin(), i));
67 auto rand(std::next(training_.
begin(), random::sup(i + 1)));
69 std::iter_swap(curr, rand);
72 const auto from(std::next(training_.
begin(), skip));
73 std::copy(from, training_.
end(), std::back_inserter(validation_));
74 training_.
erase(from, training_.
end());
76 Ensures(!training_.
empty());
77 Ensures(training_.
size() == skip);
78 Ensures(training_.
size() + validation_.
size() == available);
iterator erase(iterator, iterator)
Removes specified elements from the dataframe.
facultative< unsigned > validation_percentage
How much data should be reserved for the validation set? validation_percentage is the fraction of the...
void init(unsigned) override
During the first run examples are randomly partitioned into two sets according to a given percentage.
holdout_validation(src_problem &)
Sets up a hold-out validator.
Provides a GP-specific interface to the generic problem class.
The main namespace for the project.
dataset_t
Data/simulations are categorised in three sets:
value_t run(const T &ind)
A handy short-cut for one-time execution of an individual.