3 * \remark This file is part of VITA.
5 * \copyright Copyright (C) 2011-2023 EOS di Manlio Morini.
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
10 * You can obtain one at http://mozilla.org/MPL/2.0/
13#if !defined(VITA_SRC_EVALUATOR_H)
14# error "Don't include this file directly, include the specific .h instead"
17#if !defined(VITA_SRC_EVALUATOR_TCC)
18#define VITA_SRC_EVALUATOR_TCC
21/// \param[in] d dataset that the evaluator will use
23template<class T, class DAT>
24src_evaluator<T, DAT>::src_evaluator(DAT &d) : dat_(&d)
29/// \param[in] d the training dataset
31template<class T, class ERRF, class DAT>
32sum_of_errors_evaluator<T, ERRF, DAT>::sum_of_errors_evaluator(DAT &d)
33 : src_evaluator<T, DAT>(d)
38/// Sums the error reported by the error functor over a training set.
40/// \param[in] prg program (individual/team) used for fitness evaluation
41/// \param[in] step consider just `1` example every `step`
42/// \return the fitness (greater is better, max is `0`)
44template<class T, class ERRF, class DAT>
45fitness_t sum_of_errors_evaluator<T, ERRF, DAT>::sum_of_errors_impl(
46 const T &prg, unsigned step)
48 Expects(this->dat_->begin() != this->dat_->end());
49 Expects(!detail::classes(this->dat_));
51 const ERRF err_fctr(prg);
53 double average_error(0.0), n(0.0);
54 for (auto it(std::begin(*this->dat_));
55 std::distance(it, std::end(*this->dat_)) >= step;
56 std::advance(it, step))
58 const auto err(err_fctr(*it));
60 // User specified examples could not support difficulty.
61 if constexpr (detail::has_difficulty_v<DAT>)
65 average_error += (err - average_error) / ++n;
68 // Note that we take the average error: this way fast() and operator()
69 // outputs can be compared.
70 return {static_cast<fitness_t::value_type>(-average_error)};
74/// \param[in] prg program (individual/team) used for fitness evaluation
75/// \return the fitness (greater is better, max is `0`)
77template<class T, class ERRF, class DAT>
78fitness_t sum_of_errors_evaluator<T, ERRF, DAT>::operator()(const T &prg)
80 return sum_of_errors_impl(prg, 1);
84/// \param[in] prg program (individual/team) used for fitness evaluation
85/// \return the fitness (greater is better, max is `0`)
87/// This function is similar to operator()() but will skip 4 out of 5
88/// training instances, so it's faster.
90template<class T, class ERRF, class DAT>
91fitness_t sum_of_errors_evaluator<T, ERRF, DAT>::fast(const T &prg)
93 Expects(std::distance(this->dat_->begin(), this->dat_->end()) >= 100);
94 return sum_of_errors_impl(prg, 5);
98/// \param[in] prg program(individual/team) to be transformed in a lambda
100/// \return the lambda function associated with `prg` (`nullptr` in case
103template<class T, class LAMBDA, class DAT>
104std::unique_ptr<basic_lambda_f>
105sum_of_errors_evaluator<T, LAMBDA, DAT>::lambdify(const T &prg) const
107 return std::make_unique<basic_reg_lambda_f<T, true>>(prg);
111/// Sets up the environment for error measurement.
113/// \param[in] prg the program to be measured
116mae_error_functor<T>::mae_error_functor(const T &prg) : agent_(prg)
121/// \param[in] example current training case
122/// \return a measurement of the error of the model/program on the
123/// given training case (value in the `[0;+inf[` range)
126double mae_error_functor<T>::operator()(const dataframe::example &example) const
128 if (const auto model_value = agent_(example); has_value(model_value))
129 return std::fabs(lexical_cast<D_DOUBLE>(model_value)
130 - label_as<D_DOUBLE>(example));
132 return std::numeric_limits<double>::max() / 100.0;
136/// Sets up the environment for error measurement.
138/// \param[in] prg the program to be measured
141rmae_error_functor<T>::rmae_error_functor(const T &prg) : agent_(prg)
146/// \param[in] example current training case
147/// \return measurement of the error of the model/program on the
148/// current training case. The value returned is in the
152double rmae_error_functor<T>::operator()(
153 const dataframe::example &example) const
157 if (const auto model_value = agent_(example); has_value(model_value))
159 const auto approx(lexical_cast<D_DOUBLE>(model_value));
160 const auto target(label_as<D_DOUBLE>(example));
162 const auto delta(std::fabs(target - approx));
164 // Check if the numbers are really close. Needed when comparing numbers
166 if (delta <= 10.0 * std::numeric_limits<D_DOUBLE>::min())
169 err = 200.0 * delta / (std::fabs(approx) + std::fabs(target));
170 // Some alternatives for the error:
171 // * delta / std::max(approx, target)
172 // * delta / std::fabs(target)
174 // The chosen formula seems numerically more stable and gives a result
175 // in a limited range of values.
182/// Sets up the environment for error measurement.
184/// \param[in] prg the program to be measured
187mse_error_functor<T>::mse_error_functor(const T &prg) : agent_(prg)
192/// \param[in] example current training case
193/// \return a measurement of the error of the model/program on the
194/// the current training case. The value returned is in the
198double mse_error_functor<T>::operator()(const dataframe::example &example) const
200 if (const auto model_value = agent_(example); has_value(model_value))
202 const double err(lexical_cast<D_DOUBLE>(model_value)
203 - label_as<D_DOUBLE>(example));
207 return std::numeric_limits<double>::max() / 100.0;
211/// Sets up the environment for error measurement.
213/// \param[in] prg the program to be measured
216count_error_functor<T>::count_error_functor(const T &prg) : agent_(prg)
221/// \param[in] example current training case
222/// \return a measurement of the error of the model/program on the
223/// current training case. The value returned is in the
227double count_error_functor<T>::operator()(
228 const dataframe::example &example) const
230 const auto model_value(agent_(example));
232 const bool err(!has_value(model_value)
233 || !issmall(lexical_cast<D_DOUBLE>(model_value)
234 - label_as<D_DOUBLE>(example)));
236 return err ? 1.0 : 0.0;
240/// \param[in] d current dataset
241/// \param[in] x_slot basic parameter for the Slotted Dynamic Class Boundary
242/// Determination algorithm
245dyn_slot_evaluator<T>::dyn_slot_evaluator(dataframe &d, unsigned x_slot)
246 : classification_evaluator<T>(d), x_slot_(x_slot)
252/// \param[in] ind program used for class recognition
253/// \return the fitness (greater is better, max is `0`)
256fitness_t dyn_slot_evaluator<T>::operator()(const T &ind)
258 basic_dyn_slot_lambda_f<T, false, false> lambda(ind, *this->dat_, x_slot_);
260 fitness_t::value_type err(0.0);
261 for (auto &example : *this->dat_)
262 if (lambda.tag(example).label != label(example))
265 ++example.difficulty;
270 // The following code is faster but doesn't work for teams and doesn't
271 // "cooperate" with DSS.
273 // basic_dyn_slot_lambda_f<T,false,false> lambda(ind, *this->dat_, x_slot_);
274 // return {100.0 * (lambda.training_accuracy() - 1.0)};
278/// \param[in] ind individual to be transformed in a lambda function
279/// \return the lambda function associated with `ind` (`nullptr` in case
283std::unique_ptr<basic_lambda_f> dyn_slot_evaluator<T>::lambdify(
286 return std::make_unique<dyn_slot_lambda_f<T>>(ind, *this->dat_, x_slot_);
290/// \param[in] ind program used for class recognition
291/// \return the fitness (greater is better, max is `0`)
293/// For details about this algorithm see:
294/// * "Using Gaussian Distribution to Construct Fitnesss Functions in Genetic
295/// Programming for Multiclass Object Classification" - Mengjie Zhang, Will
296/// Smart (december 2005).
299fitness_t gaussian_evaluator<T>::operator()(const T &ind)
301 assert(this->dat_->classes() >= 2);
303 basic_gaussian_lambda_f<T, false, false> lambda(ind, *this->dat_);
305 fitness_t::value_type d(0.0);
306 for (auto &example : *this->dat_)
307 if (const auto res = lambda.tag(example); res.label == label(example))
309 const auto scale(static_cast<fitness_t::value_type>(this->dat_->classes()
312 // * `(1.0 - res.sureness)` is the sum of the errors;
313 // * `(res.sureness - 1.0)` is the opposite (standardized fitness);
314 // * `(res.sureness - 1.0) / scale` is the opposite of the average error.
315 d += (res.sureness - 1.0) / scale;
320 // * the maximum single class error is 1.0;
321 // * the maximum average class error is `1.0 / dat_->classes()`;
322 // So -1.0 is like to say that we have a complete failure.
325 ++example.difficulty;
332/// \param[in] ind individual to be transformed in a lambda function
333/// \return the lambda function associated with `ind` (`nullptr` in case
337std::unique_ptr<basic_lambda_f> gaussian_evaluator<T>::lambdify(
340 return std::make_unique<gaussian_lambda_f<T>>(ind, *this->dat_);
344/// \param[in] ind an individual
345/// \return the fitness of `ind` (greater is better, max is `0`)
348fitness_t binary_evaluator<T>::operator()(const T &ind)
350 Expects(this->dat_->classes() == 2);
352 basic_binary_lambda_f<T, false, false> agent(ind, *this->dat_);
353 fitness_t::value_type err(0.0);
355 for (auto &example : *this->dat_)
356 if (label(example) != agent.tag(example).label)
358 ++example.difficulty;
361 // err += std::fabs(val);
368/// \param[in] ind individual to be transformed in a lambda function
369/// \return the lambda function associated with `ind` (`nullptr` in case
373std::unique_ptr<basic_lambda_f> binary_evaluator<T>::lambdify(
376 return std::make_unique<binary_lambda_f<T>>(ind, *this->dat_);
379#endif // include guard