22auto weight(
const dataframe::example &v)
24 return static_cast<std::uintmax_t
>(v.difficulty)
25 +
static_cast<std::uintmax_t
>(v.age) * v.age * v.age;
42 : training_(prob.data(
dataset_t::training)),
43 validation_(prob.data(
dataset_t::validation)),
44 eva_t_(eva_t), eva_v_(eva_v),
51void dss::reset_age_difficulty(
dataframe &d)
56 example.difficulty = 0;
61std::pair<std::uintmax_t, std::uintmax_t> dss::average_age_difficulty(
64 constexpr std::pair<std::uintmax_t, std::uintmax_t> zero(0, 0);
66 const auto s(d.size());
70 auto avg(std::accumulate(d.begin(), d.end(), zero,
71 [](
const auto &p,
const dataframe::example &e)
73 return std::pair<std::uintmax_t, std::uintmax_t>(
74 p.first + e.age, p.second + e.difficulty);
83void dss::clear_evaluators()
89void dss::move_to_validation()
91 std::move(training_.
begin(), training_.
end(),
92 std::back_inserter(validation_));
95 Ensures(training_.
empty());
106 Expects(env_.
dss.value_or(0) > 0);
108 reset_age_difficulty(training_);
109 reset_age_difficulty(validation_);
115void dss::shake_impl()
117 Expects(training_.
size() + validation_.
size() >= 2);
119 move_to_validation();
121 const auto avg_v(average_age_difficulty(validation_));
122 vitaDEBUG <<
"DSS average validation difficulty " << avg_v.second
123 <<
", age " << avg_v.first;
125 const auto weight_sum(
126 std::accumulate(validation_.
begin(), validation_.
end(), std::uintmax_t(0),
129 return s + weight(e);
139 const auto s(
static_cast<double>(validation_.
size()));
140 const double ratio(std::min(0.6, 0.2 + 100.0 / (s + 100.0)));
141 assert(0.2 <= ratio && ratio <= 0.6);
142 const double target_size(std::max(1.0, s * ratio));
143 assert(1.0 <= target_size && target_size <= s);
144 const double k(target_size /
static_cast<double>(weight_sum));
147 std::partition(validation_.
begin(), validation_.
end(),
150 const auto p1(static_cast<double>(weight(e)) * k);
151 const auto prob(std::min(p1, 1.0));
153 return random::boolean(prob) == false;
156 if (pivot == validation_.
begin() || pivot == validation_.
end())
157 pivot = std::next(validation_.
begin(),
158 static_cast<std::ptrdiff_t
>(target_size));
160 assert(validation_.
size() ==
static_cast<size_t>(s));
161 std::move(pivot, validation_.
end(), std::back_inserter(training_));
162 validation_.
erase(pivot, validation_.
end());
164 vitaDEBUG <<
"DSS SHAKE (weight sum: " << weight_sum <<
", training with: "
165 << training_.
size() <<
')';
166 assert(
static_cast<size_t>(s) == training_.
size() + validation_.
size());
168 reset_age_difficulty(training_);
170 Ensures(!training_.
empty());
171 Ensures(!validation_.
empty());
176 Expects(env_.
dss.value_or(0) > 0);
178 const auto gap(*env_.
dss);
183 assert(!training_.
empty());
184 assert(!validation_.
empty());
188 vitaDEBUG <<
"DSS shaking generation " << generation;
190 const auto avg_t(average_age_difficulty(training_));
191 vitaDEBUG <<
"DSS average training difficulty " << avg_t.second;
192 assert(avg_t.first == 1);
195 std::for_each(training_.
begin(), training_.
end(), inc_age);
196 std::for_each(validation_.
begin(), validation_.
end(), inc_age);
209 move_to_validation();
virtual void clear()
Clear possible cached values.
A 2-dimensional labeled data structure with columns of potentially different types.
iterator erase(iterator, iterator)
Removes specified elements from the dataframe.
void clear()
Removes all elements from the container.
void close(unsigned) override
Moves all the example in the validation set.
bool shake(unsigned) override
Changes the training environment.
void init(unsigned) override
Available examples are randomly partitioned into two independent sets according to a given percentage...
dss(src_problem &, cached_evaluator &, cached_evaluator &)
Sets up a DSS validator.
facultative< unsigned > dss
Enables Dynamic Subset Selection every dss generations.
Provides a GP-specific interface to the generic problem class.
The main namespace for the project.
dataset_t
Data/simulations are categorised in three sets:
Stores a single element (row) of the dataset.