3 * \remark This file is part of VITA.
5 * \copyright Copyright (C) 2014-2020 EOS di Manlio Morini.
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
10 * You can obtain one at http://mozilla.org/MPL/2.0/
13#if !defined(VITA_DISTRIBUTION_H)
14# error "Don't include this file directly, include the specific .h instead"
17#if !defined(VITA_DISTRIBUTION_TCC)
18#define VITA_DISTRIBUTION_TCC
21/// Just the initial setup.
24distribution<T>::distribution() : seen_(), m2_(), max_(), mean_(), min_(),
30/// Resets gathered statics.
33void distribution<T>::clear()
35 *this = distribution();
39/// \return Number of elements of the distribution.
42std::uintmax_t distribution<T>::count() const
48/// \return The maximum value of the distribution
51T distribution<T>::max() const
58/// \return The minimum value of the distribution
61T distribution<T>::min() const
68/// \return The mean value of the distribution
71T distribution<T>::mean() const
78/// \return The variance of the distribution
81T distribution<T>::variance() const
84 return m2_ / static_cast<double>(count());
88/// Add a new value to the distribution.
90/// \param[in] val new value upon which statistics are recalculated
92/// \remark Function ignores NAN values.
96void distribution<T>::add(U val)
100 if constexpr (std::is_floating_point_v<U>)
104 const auto v1(static_cast<T>(val));
106 if constexpr (std::is_floating_point_v<T>)
111 min_ = max_ = mean_ = v1;
119 ++seen_[round_to(v1)];
125const std::map<T, std::uintmax_t> &distribution<T>::seen() const
131/// \return the entropy of the distribution.
133/// \f$H(X)=-\sum_{i=1}^n p(x_i) \dot log_b(p(x_i))\f$
134/// We use an offline algorithm
135/// (http://en.wikipedia.org/wiki/Online_algorithm).
138double distribution<T>::entropy() const
140 const double c(1.0 / std::log(2.0));
143 for (const auto &f : seen()) // f.first: fitness, f.second: sightings
145 const auto p(static_cast<double>(f.second) / static_cast<double>(count()));
147 h -= p * std::log(p) * c;
154/// \param[in] val new value upon which statistics are recalculated.
156/// Calculate running variance and cumulative average of a set. The
157/// algorithm used is due to Knuth (Donald E. Knuth - The Art of Computer
158/// Programming, volume 2: Seminumerical Algorithms, 3rd edn., p. 232.
162/// * <http://en.wikipedia.org/wiki/Online_algorithm>
163/// * <http://en.wikipedia.org/wiki/Moving_average#Cumulative_moving_average>
166void distribution<T>::update_variance(T val)
170 const auto c1(static_cast<double>(count()));
172 const T delta(val - mean());
175 // This expression uses the new value of mean.
177 m2_ += delta * (val - mean());
179 m2_ = delta * (val - mean());
183/// \return the standard deviation of the distribution.
186T distribution<T>::standard_deviation() const
188 // This way, for "regular" types we'll use std::sqrt ("taken in" by the
189 // using statement), while for our types the overload will prevail due to
190 // Koenig lookup (<http://www.gotw.ca/gotw/030.htm>).
193 return sqrt(variance());
197/// \param[out] out output stream.
198/// \return true on success.
200/// Saves the distribution on persistent storage.
203bool distribution<T>::save(std::ostream &out) const
207 out << count() << '\n'
208 << std::fixed << std::scientific
209 << std::setprecision(std::numeric_limits<T>::digits10 + 1)
215 out << seen().size() << '\n';
216 for (const auto &elem : seen())
217 out << elem.first << ' ' << elem.second << '\n';
223/// \param[in] in input stream.
224/// \return true on success.
226/// Loads the distribution from persistent storage.
229/// If the load operation isn't successful the current object isn't modified.
232bool distribution<T>::load(std::istream &in)
240 in >> std::fixed >> std::scientific
241 >> std::setprecision(std::numeric_limits<T>::digits10 + 1);
259 typename decltype(seen_)::size_type n;
264 for (decltype(n) i(0); i < n; ++i)
266 typename decltype(seen_)::key_type key;
267 typename decltype(seen_)::mapped_type val;
268 if (!(in >> key >> val))
285/// \return `true` if the object passes the internal consistency check.
288bool distribution<T>::is_valid() const
290 // This way, for "regular" types we'll use std::infinite / std::isnan
291 // ("taken in" by the using statement), while for our types the overload
292 // will prevail due to Koenig lookup (<http://www.gotw.ca/gotw/030.htm>).
296 if (count() && isfinite(min()) && isfinite(mean()) && min() > mean())
298 vitaERROR << "Distribution: min=" << min() << " > mean=" << mean();
302 if (count() && isfinite(max()) && isfinite(mean()) && max() < mean())
304 vitaERROR << "Distribution: max=" << max() << " < mean=" << mean();
308 if (count() && (isnan(variance()) || !isnonnegative(variance())))
310 vitaERROR << "Distribution: negative variance";
316#endif // Include guard