3 * \remark This file is part of VITA.
5 * \copyright Copyright (C) 2013-2020 EOS di Manlio Morini.
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
10 * You can obtain one at http://mozilla.org/MPL/2.0/
13#if !defined(VITA_LAMBDA_F_H)
14# error "Don't include this file directly, include the specific .h instead"
17#if !defined(VITA_LAMBDA_F_TCC)
18#define VITA_LAMBDA_F_TCC
20template<class T, bool S>
21const std::string basic_reg_lambda_f<T, S>::SERIALIZE_ID(
22 is_team<T>() ? "TEAM_REG_LAMBDA_F" : "REG_LAMBDA_F");
24template<class T, bool S, bool N>
25const std::string basic_dyn_slot_lambda_f<T, S, N>::SERIALIZE_ID(
28template<class T, bool S, bool N>
29const std::string basic_gaussian_lambda_f<T, S, N>::SERIALIZE_ID(
32template<class T, bool S, bool N>
33const std::string basic_binary_lambda_f<T, S, N>::SERIALIZE_ID(
36template<class T, bool S, bool N, template<class, bool, bool> class L,
38const std::string team_class_lambda_f<T, S, N, L, C>::SERIALIZE_ID(
39 "TEAM_" + L<T, S, N>::SERIALIZE_ID);
42/// \param[in] prg the program (individual/team) to be lambdified
44template<class T, bool S>
45basic_reg_lambda_f<T, S>::basic_reg_lambda_f(const T &prg)
46 : detail::reg_lambda_f_storage<T, S>(prg)
52/// \param[in] in input stream
53/// \param[in] ss active symbol set
55template<class T, bool S>
56basic_reg_lambda_f<T, S>::basic_reg_lambda_f(std::istream &in,
58 : detail::reg_lambda_f_storage<T, S>(in, ss)
60 static_assert(S, "reg_lambda_f requires storage for de-serialization");
66/// \param[in] e input example for the lambda function
67/// \return the output value associated with `e`
69template<class T, bool S>
70value_t basic_reg_lambda_f<T, S>::operator()(const dataframe::example &e) const
72 // Here using *tag dispatching by instance*: main function delegates to an
73 // implementation function that receives standard arguments plus a dummy
74 // argument based on a compile-time condition.
75 // Usually this is much easier to debug and get right that the
76 // `std::enable_if` solution.
77 // Moreover this is almost guaranteed to be optimized away by a decent
79 return eval(e, is_team<T>());
82template<class T, bool S>
83value_t basic_reg_lambda_f<T, S>::eval(const dataframe::example &e,
84 std::false_type) const
86 return this->run(e.input);
89template<class T, bool S>
90value_t basic_reg_lambda_f<T, S>::eval(const dataframe::example &e,
93 D_DOUBLE avg(0), count(0);
95 // Calculate the running average.
96 for (const auto &core : this->team_)
98 const auto res(core.run(e.input));
101 avg += (lexical_cast<D_DOUBLE>(res) - avg) / ++count;
111/// \return a *failed* status
113/// \warning This function is useful only for classification tasks.
115template<class T, bool S>
116classification_result basic_reg_lambda_f<T, S>::tag(
117 const dataframe::example &) const
123/// \param[in] a value produced by basic_lambda_f::operator()
124/// \return the string version of `a`
126template<class T, bool S>
127std::string basic_reg_lambda_f<T, S>::name(const value_t &a) const
129 return lexical_cast<std::string>(a);
133/// Calls (dynamic dispatch) polymhorphic model_metric `m` on `this`.
135/// \param[in] m a metric we are evaluating
136/// \param[in] d a dataset
137/// \return the value of `this` according to metric `m`
139template<class T, bool S>
140double basic_reg_lambda_f<T, S>::measure(const model_metric &m,
141 const dataframe &d) const
147/// \return `true` if the object passes the internal consistency check
149template<class T, bool S>
150bool basic_reg_lambda_f<T, S>::is_valid() const
152 return detail::reg_lambda_f_storage<T, S>::is_valid();
156/// Saves the object on persistent storage.
158/// \param[out] out output stream
159/// \return `true` if lambda was saved correctly
161template<class T, bool S>
162bool basic_reg_lambda_f<T, S>::save(std::ostream &out) const
164 return detail::reg_lambda_f_storage<T, S>::save(out);
168/// \param[in] d the training set
171basic_class_lambda_f<N>::basic_class_lambda_f(const dataframe &d)
172 : detail::class_names<N>(d)
177/// \param[in] e example to be classified
178/// \return the label of the class that includes `e`
181value_t basic_class_lambda_f<N>::operator()(const dataframe::example &e) const
183 return static_cast<D_INT>(this->tag(e).label);
187/// Calls (dynamic dispatch) polymhorphic model_metric `m` on `this`.
189/// \param[in] m a metric we are evaluating
190/// \param[in] d a dataset
191/// \return the value of `this` according to metric `m`
194double basic_class_lambda_f< N>::measure(const model_metric &m,
195 const dataframe &d) const
201/// \param[in] a id of a class
202/// \return the name of class `a`
205std::string basic_class_lambda_f<N>::name(const value_t &a) const
207 return detail::class_names<N>::string(a);
211/// \param[in] ind individual "to be transformed" into a lambda function
212/// \param[in] d the training set
213/// \param[in] x_slot number of slots for each class of the training set
215template<class T, bool S, bool N>
216basic_dyn_slot_lambda_f<T, S, N>::basic_dyn_slot_lambda_f(const T &ind,
219 : basic_class_lambda_f<N>(d), lambda_(ind),
220 slot_matrix_(d.classes() * x_slot, d.classes()),
221 slot_class_(d.classes() * x_slot), dataset_size_(0)
223 Expects(d.classes() > 1);
226 fill_matrix(d, x_slot);
232/// Constructs the object reading data from an input stream.
234/// \param[in] in input stream
235/// \param[in] ss active symbol set
237template<class T, bool S, bool N>
238basic_dyn_slot_lambda_f<T, S, N>::basic_dyn_slot_lambda_f(std::istream &in,
239 const symbol_set &ss)
240 : basic_class_lambda_f<N>(), lambda_(in, ss), slot_matrix_(), slot_class_(),
244 S, "dyn_slot_lambda_f requires storage space for de-serialization");
246 if (!slot_matrix_.load(in))
247 throw exception::data_format(
248 "Cannot read dyn_slot_lambda_f matrix component");
250 std::generate_n(std::back_inserter(slot_class_), slot_matrix_.rows(),
255 throw exception::data_format(
256 "Cannot read dyn_slot_lambda_f slot_class component");
260 if (!(in >> dataset_size_))
261 throw exception::data_format(
262 "Cannot read dyn_slot_lambda_f dataset_size component");
264 if (!detail::class_names<N>::load(in))
265 throw exception::data_format(
266 "Cannot read dyn_slot_lambda_f class_names component");
272/// Sets up the data structures needed by the 'dynamic slot' algorithm.
274/// \param[in] d the training set
275/// \param[in] x_slot number of slots for each class of the training set
277template<class T, bool S, bool N>
278void basic_dyn_slot_lambda_f<T, S, N>::fill_matrix(dataframe &d,
281 Expects(d.classes() > 1);
284 const auto n_slots(d.classes() * x_slot);
285 assert(n_slots == slot_matrix_.rows());
286 assert(slot_matrix_.cols() == d.classes());
288 // Here starts the slot-filling task.
289 slot_matrix_.fill(0);
291 // In the first step this method evaluates the program to obtain an output
292 // value for each training example. Based on the program output a
293 // bi-dimensional matrix is built (slot_matrix_(slot, class)).
294 for (const auto &example : d)
298 ++slot_matrix_(slot(example), label(example));
301 const auto unknown(d.classes());
303 // In the second step the method dynamically determine which class each
304 // slot belongs to by simply taking the class with the largest value at the
306 for (auto i(decltype(n_slots){0}); i < n_slots; ++i)
308 const auto cols(slot_matrix_.cols());
309 auto best_class(decltype(cols){0}); // Initially assuming class 0 as best
311 // ...then looking for a better class among the remaining ones.
312 for (auto j(decltype(cols){1}); j < cols; ++j)
313 if (slot_matrix_(i, j) >= slot_matrix_(i, best_class))
316 slot_class_[i] = slot_matrix_(i, best_class) ? best_class : unknown;
319 // Unknown slots can be a problem with new examples (not contained in the
320 // training set). We arbitrary assign them to the class of a neighbour
321 // slot (if available).
322 // Another interesting strategy would be to assign unknown slots to the
324 for (auto i(decltype(n_slots){0}); i < n_slots; ++i)
325 if (slot_class_[i] == unknown)
327 if (i && slot_class_[i - 1] != unknown)
328 slot_class_[i] = slot_class_[i - 1];
329 else if (i + 1 < n_slots && slot_class_[i + 1] != unknown)
330 slot_class_[i] = slot_class_[i + 1];
337/// \param[in] e input data
338/// \return the slot example `e` falls into
340template<class T, bool S, bool N>
341std::size_t basic_dyn_slot_lambda_f<T,S,N>::slot(
342 const dataframe::example &e) const
344 const auto res(lambda_(e));
346 const auto ns(slot_matrix_.rows());
347 const auto last_slot(ns - 1);
351 const auto val(lexical_cast<D_DOUBLE>(res));
352 const auto where(discretization(val, last_slot));
354 return (where >= ns) ? last_slot : where;
358/// \return the accuracy of the lambda function on the training set
360template<class T, bool S, bool N>
361double basic_dyn_slot_lambda_f<T, S, N>::training_accuracy() const
365 const auto slots(slot_matrix_.rows());
367 for (auto i(decltype(slots){0}); i < slots; ++i)
368 ok += slot_matrix_(i, slot_class_[i]);
370 assert(dataset_size_ >= ok);
372 return ok / dataset_size_;
376/// \param[in] instance data to be classified
377/// \return the class of `instance` (numerical id) and the
378/// confidence level (in the range `[0,1]`)
380template<class T, bool S, bool N>
381classification_result basic_dyn_slot_lambda_f<T, S, N>::tag(
382 const dataframe::example &instance) const
384 const auto s(slot(instance));
385 const auto classes(slot_matrix_.cols());
388 for (auto j(decltype(classes){0}); j < classes; ++j)
389 total += slot_matrix_(s, j);
391 const auto ok(slot_matrix_(s, slot_class_[s]));
393 const double confidence(!total ? 0.5 : static_cast<double>(ok) / total);
395 return {slot_class_[s], confidence};
399/// Saves the lambda on persistent storage.
401/// \param[out] out output stream
402/// \return `true` on success
404template<class T, bool S, bool N>
405bool basic_dyn_slot_lambda_f<T, S, N>::save(std::ostream &out) const
407 if (!lambda_.save(out))
410 if (!slot_matrix_.save(out))
413 // Don't need to save slot_class_.size() since it's equal to
414 // slot_matrix_.rows()
415 for (auto s : slot_class_)
416 if (!(out << s << '\n'))
419 if (!(out << dataset_size_ << '\n'))
422 return detail::class_names<N>::save(out);
426/// \return `true` if the object passes the internal consistency check
428template<class T, bool S, bool N>
429bool basic_dyn_slot_lambda_f<T, S, N>::is_valid() const
431 if (slot_matrix_.cols() <= 1) // too few classes
434 if (slot_matrix_.rows() != slot_class_.size())
441/// \param[in] ind individual "to be transformed" into a lambda function
442/// \param[in] d the training set
444template<class T, bool S, bool N>
445basic_gaussian_lambda_f<T, S, N>::basic_gaussian_lambda_f(const T &ind,
447 : basic_class_lambda_f<N>(d), lambda_(ind), gauss_dist_(d.classes())
449 Expects(d.classes() > 1);
457/// Constructs the object reading data from an input stream.
459/// \param[in] in input stream
460/// \param[in] ss active symbol set
462template<class T, bool S, bool N>
463basic_gaussian_lambda_f<T, S, N>::basic_gaussian_lambda_f(std::istream &in,
464 const symbol_set &ss)
465 : basic_class_lambda_f<N>(), lambda_(in, ss), gauss_dist_()
468 S, "gaussian_lambda_f requires storage space for de-serialization");
470 typename decltype(gauss_dist_)::size_type n;
472 throw exception::data_format(
473 "Cannot read gaussian_lambda_f size component");
475 for (decltype(n) i(0); i < n; ++i)
477 distribution<number> d;
479 throw exception::data_format(
480 "Cannot read gaussian_lambda_f distribution component");
482 gauss_dist_.push_back(d);
485 if (!detail::class_names<N>::load(in))
486 throw exception::data_format(
487 "Cannot read gaussian_lambda_f class_names component");
493/// Sets up the data structures needed by the gaussian algorithm.
495/// \param[in] d the training set
497template<class T, bool S, bool N>
498void basic_gaussian_lambda_f<T, S, N>::fill_vector(dataframe &d)
500 Expects(d.classes() > 1);
502 // For a set of training data, we assume that the behaviour of a program
503 // classifier is modelled using multiple Gaussian distributions, each of
504 // which corresponds to a particular class. The distribution of a class is
505 // determined by evaluating the program on the examples of the class in
506 // the training set. This is done by taking the mean and standard deviation
507 // of the program outputs for those training examples for that class.
508 for (const auto &example : d)
510 const auto res(lambda_(example));
512 number val(has_value(res) ? lexical_cast<D_DOUBLE>(res) : 0.0);
513 const number cut(10000000.0);
519 gauss_dist_[label(example)].add(val);
524/// \param[in] example input value whose class we are interested in
525/// \return the class of `example` (numerical id) and the confidence
526/// level (how sure you can be that `example` is properly
527/// classified. The value is in the `[0,1]` interval and the
528/// sum of all the confidence levels of each class equals
531template<class T, bool S, bool N>
532classification_result basic_gaussian_lambda_f<T, S, N>::tag(
533 const dataframe::example &example) const
535 const auto res(lambda_(example));
536 const number x(has_value(res) ? lexical_cast<D_DOUBLE>(res) : 0.0);
538 number val_(0.0), sum_(0.0);
539 class_t probable_class(0);
541 const auto classes(static_cast<unsigned>(gauss_dist_.size()));
542 for (auto i(decltype(classes){0}); i < classes; ++i)
544 const number distance(std::fabs(x - gauss_dist_[i].mean()));
545 const number variance(gauss_dist_[i].variance());
548 if (issmall(variance)) // These are borderline cases
549 if (issmall(distance)) // These are borderline cases
553 else // This is the standard case
554 p = std::exp(-distance * distance / variance);
565 // Normalized confidence value.
566 // Do not change sum_ > 0.0 with
567 // - issmall(sum_) => when sum_ is small, val_ is smaller and the division
569 // - sum_ => it's the same thing but will produce a warning with
571 const double confidence(sum_ > 0.0 ? val_ / sum_ : 0.0);
573 return {probable_class, confidence};
577/// Saves the lambda on persistent storage.
579/// \param[out] out output stream
580/// \return `true` on success
582template<class T, bool S, bool N>
583bool basic_gaussian_lambda_f<T, S, N>::save(std::ostream &out) const
585 if (!lambda_.save(out))
588 if (!(out << gauss_dist_.size() << '\n'))
591 for (const auto &g : gauss_dist_)
595 return detail::class_names<N>::save(out);
599/// \return `true` if the object passes the internal consistency check
601template<class T, bool S, bool N>
602bool basic_gaussian_lambda_f<T, S, N>::is_valid() const
608/// Constructs the object reading data from an input stream.
610/// \param[in] ind individual "to be transformed" into a lambda function
611/// \param[in] d the training set
613template<class T, bool S, bool N>
614basic_binary_lambda_f<T, S, N>::basic_binary_lambda_f(const T &ind,
616 : basic_class_lambda_f<N>(d), lambda_(ind)
618 Expects(d.classes() == 2);
624/// \param[in] in input stream
625/// \param[in] ss active symbol set
627template<class T, bool S, bool N>
628basic_binary_lambda_f<T, S, N>::basic_binary_lambda_f(std::istream &in,
629 const symbol_set &ss)
630 : basic_class_lambda_f<N>(), lambda_(in, ss)
633 S, "binary_lambda_f requires storage space for de-serialization");
635 if (!detail::class_names<N>::load(in))
636 throw exception::data_format(
637 "Cannot read binary_lambda_f class_names component");
643/// \param[in] e input example for the lambda function
644/// \return the class of `e` (numerical id) and the confidence level (in
645/// the `[0,1]` interval)
647template<class T, bool S, bool N>
648classification_result basic_binary_lambda_f<T, S, N>::tag(
649 const dataframe::example &e) const
651 const auto res(lambda_(e));
652 const number val(has_value(res) ? lexical_cast<D_DOUBLE>(res) : 0.0);
654 return {val > 0.0 ? 1u : 0u, std::fabs(val)};
658/// \return `true` if the object passes the internal consistency check
660template<class T, bool S, bool N>
661bool basic_binary_lambda_f<T, S, N>::is_valid() const
667/// Saves the lambda on persistent storage.
669/// \param[out] out output stream
670/// \return `true` on success
672template<class T, bool S, bool N>
673bool basic_binary_lambda_f<T, S, N>::save(std::ostream &out) const
675 if (!lambda_.save(out))
678 return detail::class_names<N>::save(out);
682/// \param[in] t team "to be transformed" into a lambda function
683/// \param[in] d the training set
684/// \param[in] args auxiliary parameters for the specific lambda function
686template<class T, bool S, bool N, template<class, bool, bool> class L,
688template<class... Args>
689team_class_lambda_f<T, S, N, L, C>::team_class_lambda_f(const team<T> &t,
692 : basic_class_lambda_f<N>(d), classes_(d.classes())
694 team_.reserve(t.individuals());
695 for (const auto &ind : t)
696 team_.emplace_back(ind, d, std::forward<Args>(args)...);
700/// Constructs the object reading data from an input stream.
702/// \param[in] in input stream
703/// \param[in] ss active symbol set
705template<class T, bool S, bool N, template<class, bool, bool> class L,
707team_class_lambda_f<T, S, N, L, C>::team_class_lambda_f(std::istream &in,
708 const symbol_set &ss)
709 : basic_class_lambda_f<N>(), classes_()
712 S, "team_class_lambda_f requires storage space for de-serialization");
714 if (!(in >> classes_))
715 throw exception::data_format("Cannot read number of classes");
717 typename decltype(team_)::size_type s;
719 throw exception::data_format("Cannot read team size");
722 for (unsigned i(0); i < s; ++i)
723 team_.emplace_back(in, ss);
725 if (!detail::class_names<N>::load(in))
726 throw exception::data_format("Cannot read class_names");
730/// Specialized method for teams.
732/// \param[in] instance data to be classified
733/// \return the class of `instance` (numerical id) and the
734/// confidence level (in the `0,1]` interval)
736/// * `team_composition::mv` the class which most of the individuals predict
737/// for a given example is selected as team output.
738/// * `team_composition::wta` the winner is the individual with the highest
739/// confidence in its decision. Specialization may emerge if different
740/// members of the team win this contest for different fitness cases (of
741/// curse, it is not a feasible alternative to select the member with the
742/// best fitness. Then a decision on unknown data is only possible if the
743/// right outputs are known in advance and is not made by the team itself).
745template<class T, bool S, bool N, template<class, bool, bool> class L,
747classification_result team_class_lambda_f<T, S, N, L, C>::tag(
748 const dataframe::example &instance) const
750 if constexpr (C == team_composition::wta)
752 const auto size(team_.size());
753 auto best(team_[0].tag(instance));
755 for (auto i(decltype(size){1}); i < size; ++i)
757 const auto res(team_[i].tag(instance));
759 if (res.sureness > best.sureness)
765 else if constexpr (C == team_composition::mv)
767 std::vector<unsigned> votes(classes_);
769 for (const auto &lambda : team_)
770 ++votes[lambda.tag(instance).label];
773 for (auto i(max + 1); i < classes_; ++i)
774 if (votes[i] > votes[max])
777 return {max, static_cast<double>(votes[max]) /
778 static_cast<double>(team_.size())};
783/// Saves the lambda team on persistent storage.
785/// \param[out] out output stream
786/// \return `true` on success
788template<class T, bool S, bool N, template<class, bool, bool> class L,
790bool team_class_lambda_f<T, S, N, L, C>::save(std::ostream &out) const
792 if (!(out << classes_ << '\n'))
795 if (!(out << team_.size() << '\n'))
798 for (const auto &i : team_)
802 return detail::class_names<N>::save(out);
806/// \return Class ID used for serialization.
808template<class T, bool S, bool N, template<class, bool, bool> class L,
810std::string team_class_lambda_f<T, S, N, L, C>::serialize_id() const
812 Expects(team_.size());
813 return "TEAM_" + L<T, S, N>::SERIALIZE_ID;
817/// \return `true` if the object passes the internal consistency check
819template<class T, bool S, bool N, template<class, bool, bool> class L,
821bool team_class_lambda_f<T, S, N, L, C>::is_valid() const
834using build_func = std::unique_ptr<basic_src_lambda_f> (*)(std::istream &,
838std::unique_ptr<basic_src_lambda_f> build(std::istream &in,
839 const symbol_set &ss)
841 return std::make_unique<U>(in, ss);
844extern std::map<std::string, build_func> factory_;
848/// Allows insertion of user defined classificators.
851bool insert(const std::string &id)
853 Expects(!id.empty());
854 return detail::factory_.insert({id, detail::build<U>}).second;
858std::unique_ptr<basic_src_lambda_f> load(std::istream &in,
859 const symbol_set &ss)
861 if (detail::factory_.find(reg_lambda_f<T>::SERIALIZE_ID)
862 == detail::factory_.end())
864 insert<reg_lambda_f<T>>(reg_lambda_f<T>::SERIALIZE_ID);
865 insert<dyn_slot_lambda_f<T>>(dyn_slot_lambda_f<T>::SERIALIZE_ID);
866 insert<gaussian_lambda_f<T>>(gaussian_lambda_f<T>::SERIALIZE_ID);
867 insert<binary_lambda_f<T>>(binary_lambda_f<T>::SERIALIZE_ID);
874 const auto iter(detail::factory_.find(id));
875 if (iter != detail::factory_.end())
876 return iter->second(in, ss);
883} // namespace serialize
885#endif // include guard