Vita
gp/src/evaluator.tcc
1/**
2 * \file
3 * \remark This file is part of VITA.
4 *
5 * \copyright Copyright (C) 2011-2023 EOS di Manlio Morini.
6 *
7 * \license
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
10 * You can obtain one at http://mozilla.org/MPL/2.0/
11 */
12
13#if !defined(VITA_SRC_EVALUATOR_H)
14# error "Don't include this file directly, include the specific .h instead"
15#endif
16
17#if !defined(VITA_SRC_EVALUATOR_TCC)
18#define VITA_SRC_EVALUATOR_TCC
19
20///
21/// \param[in] d dataset that the evaluator will use
22///
23template<class T, class DAT>
24src_evaluator<T, DAT>::src_evaluator(DAT &d) : dat_(&d)
25{
26}
27
28///
29/// \param[in] d the training dataset
30///
31template<class T, class ERRF, class DAT>
32sum_of_errors_evaluator<T, ERRF, DAT>::sum_of_errors_evaluator(DAT &d)
33 : src_evaluator<T, DAT>(d)
34{
35}
36
37///
38/// Sums the error reported by the error functor over a training set.
39///
40/// \param[in] prg program (individual/team) used for fitness evaluation
41/// \param[in] step consider just `1` example every `step`
42/// \return the fitness (greater is better, max is `0`)
43///
44template<class T, class ERRF, class DAT>
45fitness_t sum_of_errors_evaluator<T, ERRF, DAT>::sum_of_errors_impl(
46 const T &prg, unsigned step)
47{
48 Expects(this->dat_->begin() != this->dat_->end());
49 Expects(!detail::classes(this->dat_));
50
51 const ERRF err_fctr(prg);
52
53 double average_error(0.0), n(0.0);
54 for (auto it(std::begin(*this->dat_));
55 std::distance(it, std::end(*this->dat_)) >= step;
56 std::advance(it, step))
57 {
58 const auto err(err_fctr(*it));
59
60 // User specified examples could not support difficulty.
61 if constexpr (detail::has_difficulty_v<DAT>)
62 if (!issmall(err))
63 ++it->difficulty;
64
65 average_error += (err - average_error) / ++n;
66 }
67
68 // Note that we take the average error: this way fast() and operator()
69 // outputs can be compared.
70 return {static_cast<fitness_t::value_type>(-average_error)};
71}
72
73///
74/// \param[in] prg program (individual/team) used for fitness evaluation
75/// \return the fitness (greater is better, max is `0`)
76///
77template<class T, class ERRF, class DAT>
78fitness_t sum_of_errors_evaluator<T, ERRF, DAT>::operator()(const T &prg)
79{
80 return sum_of_errors_impl(prg, 1);
81}
82
83///
84/// \param[in] prg program (individual/team) used for fitness evaluation
85/// \return the fitness (greater is better, max is `0`)
86///
87/// This function is similar to operator()() but will skip 4 out of 5
88/// training instances, so it's faster.
89///
90template<class T, class ERRF, class DAT>
91fitness_t sum_of_errors_evaluator<T, ERRF, DAT>::fast(const T &prg)
92{
93 Expects(std::distance(this->dat_->begin(), this->dat_->end()) >= 100);
94 return sum_of_errors_impl(prg, 5);
95}
96
97///
98/// \param[in] prg program(individual/team) to be transformed in a lambda
99/// function
100/// \return the lambda function associated with `prg` (`nullptr` in case
101/// of errors).
102///
103template<class T, class LAMBDA, class DAT>
104std::unique_ptr<basic_lambda_f>
105sum_of_errors_evaluator<T, LAMBDA, DAT>::lambdify(const T &prg) const
106{
107 return std::make_unique<basic_reg_lambda_f<T, true>>(prg);
108}
109
110///
111/// Sets up the environment for error measurement.
112///
113/// \param[in] prg the program to be measured
114///
115template<class T>
116mae_error_functor<T>::mae_error_functor(const T &prg) : agent_(prg)
117{
118}
119
120///
121/// \param[in] example current training case
122/// \return a measurement of the error of the model/program on the
123/// given training case (value in the `[0;+inf[` range)
124///
125template<class T>
126double mae_error_functor<T>::operator()(const dataframe::example &example) const
127{
128 if (const auto model_value = agent_(example); has_value(model_value))
129 return std::fabs(lexical_cast<D_DOUBLE>(model_value)
130 - label_as<D_DOUBLE>(example));
131
132 return std::numeric_limits<double>::max() / 100.0;
133}
134
135///
136/// Sets up the environment for error measurement.
137///
138/// \param[in] prg the program to be measured
139///
140template<class T>
141rmae_error_functor<T>::rmae_error_functor(const T &prg) : agent_(prg)
142{
143}
144
145///
146/// \param[in] example current training case
147/// \return measurement of the error of the model/program on the
148/// current training case. The value returned is in the
149/// `[0;200]` range
150///
151template<class T>
152double rmae_error_functor<T>::operator()(
153 const dataframe::example &example) const
154{
155 double err(200.0);
156
157 if (const auto model_value = agent_(example); has_value(model_value))
158 {
159 const auto approx(lexical_cast<D_DOUBLE>(model_value));
160 const auto target(label_as<D_DOUBLE>(example));
161
162 const auto delta(std::fabs(target - approx));
163
164 // Check if the numbers are really close. Needed when comparing numbers
165 // near zero.
166 if (delta <= 10.0 * std::numeric_limits<D_DOUBLE>::min())
167 err = 0.0;
168 else
169 err = 200.0 * delta / (std::fabs(approx) + std::fabs(target));
170 // Some alternatives for the error:
171 // * delta / std::max(approx, target)
172 // * delta / std::fabs(target)
173 //
174 // The chosen formula seems numerically more stable and gives a result
175 // in a limited range of values.
176 }
177
178 return err;
179}
180
181///
182/// Sets up the environment for error measurement.
183///
184/// \param[in] prg the program to be measured
185///
186template<class T>
187mse_error_functor<T>::mse_error_functor(const T &prg) : agent_(prg)
188{
189}
190
191///
192/// \param[in] example current training case
193/// \return a measurement of the error of the model/program on the
194/// the current training case. The value returned is in the
195/// `[0;+inf[` range
196///
197template<class T>
198double mse_error_functor<T>::operator()(const dataframe::example &example) const
199{
200 if (const auto model_value = agent_(example); has_value(model_value))
201 {
202 const double err(lexical_cast<D_DOUBLE>(model_value)
203 - label_as<D_DOUBLE>(example));
204 return err * err;
205 }
206
207 return std::numeric_limits<double>::max() / 100.0;
208}
209
210///
211/// Sets up the environment for error measurement.
212///
213/// \param[in] prg the program to be measured
214///
215template<class T>
216count_error_functor<T>::count_error_functor(const T &prg) : agent_(prg)
217{
218}
219
220///
221/// \param[in] example current training case
222/// \return a measurement of the error of the model/program on the
223/// current training case. The value returned is in the
224/// `[0;+inf[` range
225///
226template<class T>
227double count_error_functor<T>::operator()(
228 const dataframe::example &example) const
229{
230 const auto model_value(agent_(example));
231
232 const bool err(!has_value(model_value)
233 || !issmall(lexical_cast<D_DOUBLE>(model_value)
234 - label_as<D_DOUBLE>(example)));
235
236 return err ? 1.0 : 0.0;
237}
238
239///
240/// \param[in] d current dataset
241/// \param[in] x_slot basic parameter for the Slotted Dynamic Class Boundary
242/// Determination algorithm
243///
244template<class T>
245dyn_slot_evaluator<T>::dyn_slot_evaluator(dataframe &d, unsigned x_slot)
246 : classification_evaluator<T>(d), x_slot_(x_slot)
247{
248 assert(x_slot_);
249}
250
251///
252/// \param[in] ind program used for class recognition
253/// \return the fitness (greater is better, max is `0`)
254///
255template<class T>
256fitness_t dyn_slot_evaluator<T>::operator()(const T &ind)
257{
258 basic_dyn_slot_lambda_f<T, false, false> lambda(ind, *this->dat_, x_slot_);
259
260 fitness_t::value_type err(0.0);
261 for (auto &example : *this->dat_)
262 if (lambda.tag(example).label != label(example))
263 {
264 ++err;
265 ++example.difficulty;
266 }
267
268 return {-err};
269
270 // The following code is faster but doesn't work for teams and doesn't
271 // "cooperate" with DSS.
272 //
273 // basic_dyn_slot_lambda_f<T,false,false> lambda(ind, *this->dat_, x_slot_);
274 // return {100.0 * (lambda.training_accuracy() - 1.0)};
275}
276
277///
278/// \param[in] ind individual to be transformed in a lambda function
279/// \return the lambda function associated with `ind` (`nullptr` in case
280/// of errors)
281///
282template<class T>
283std::unique_ptr<basic_lambda_f> dyn_slot_evaluator<T>::lambdify(
284 const T &ind) const
285{
286 return std::make_unique<dyn_slot_lambda_f<T>>(ind, *this->dat_, x_slot_);
287}
288
289///
290/// \param[in] ind program used for class recognition
291/// \return the fitness (greater is better, max is `0`)
292///
293/// For details about this algorithm see:
294/// * "Using Gaussian Distribution to Construct Fitnesss Functions in Genetic
295/// Programming for Multiclass Object Classification" - Mengjie Zhang, Will
296/// Smart (december 2005).
297///
298template<class T>
299fitness_t gaussian_evaluator<T>::operator()(const T &ind)
300{
301 assert(this->dat_->classes() >= 2);
302
303 basic_gaussian_lambda_f<T, false, false> lambda(ind, *this->dat_);
304
305 fitness_t::value_type d(0.0);
306 for (auto &example : *this->dat_)
307 if (const auto res = lambda.tag(example); res.label == label(example))
308 {
309 const auto scale(static_cast<fitness_t::value_type>(this->dat_->classes()
310 - 1));
311 // Note:
312 // * `(1.0 - res.sureness)` is the sum of the errors;
313 // * `(res.sureness - 1.0)` is the opposite (standardized fitness);
314 // * `(res.sureness - 1.0) / scale` is the opposite of the average error.
315 d += (res.sureness - 1.0) / scale;
316 }
317 else
318 {
319 // Note:
320 // * the maximum single class error is 1.0;
321 // * the maximum average class error is `1.0 / dat_->classes()`;
322 // So -1.0 is like to say that we have a complete failure.
323 d -= 1.0;
324
325 ++example.difficulty;
326 }
327
328 return {d};
329}
330
331///
332/// \param[in] ind individual to be transformed in a lambda function
333/// \return the lambda function associated with `ind` (`nullptr` in case
334/// of errors)
335///
336template<class T>
337std::unique_ptr<basic_lambda_f> gaussian_evaluator<T>::lambdify(
338 const T &ind) const
339{
340 return std::make_unique<gaussian_lambda_f<T>>(ind, *this->dat_);
341}
342
343///
344/// \param[in] ind an individual
345/// \return the fitness of `ind` (greater is better, max is `0`)
346///
347template<class T>
348fitness_t binary_evaluator<T>::operator()(const T &ind)
349{
350 Expects(this->dat_->classes() == 2);
351
352 basic_binary_lambda_f<T, false, false> agent(ind, *this->dat_);
353 fitness_t::value_type err(0.0);
354
355 for (auto &example : *this->dat_)
356 if (label(example) != agent.tag(example).label)
357 {
358 ++example.difficulty;
359 ++err;
360
361 // err += std::fabs(val);
362 }
363
364 return {-err};
365}
366
367///
368/// \param[in] ind individual to be transformed in a lambda function
369/// \return the lambda function associated with `ind` (`nullptr` in case
370/// of errors)
371///
372template<class T>
373std::unique_ptr<basic_lambda_f> binary_evaluator<T>::lambdify(
374 const T &ind) const
375{
376 return std::make_unique<binary_lambda_f<T>>(ind, *this->dat_);
377}
378
379#endif // include guard