| /* |
| * Created by Joachim on 16/04/2019. |
| * Adapted from donated nonius code. |
| * |
| * Distributed under the Boost Software License, Version 1.0. (See accompanying |
| * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
| */ |
| |
| // Statistical analysis tools |
| |
| #ifndef TWOBLUECUBES_CATCH_DETAIL_ANALYSIS_HPP_INCLUDED |
| #define TWOBLUECUBES_CATCH_DETAIL_ANALYSIS_HPP_INCLUDED |
| |
| #include "../catch_clock.hpp" |
| #include "../catch_estimate.hpp" |
| #include "../catch_outlier_classification.hpp" |
| |
| #include <algorithm> |
| #include <functional> |
| #include <vector> |
| #include <iterator> |
| #include <numeric> |
| #include <tuple> |
| #include <cmath> |
| #include <utility> |
| #include <cstddef> |
| #include <random> |
| |
| namespace Catch { |
| namespace Benchmark { |
| namespace Detail { |
| using sample = std::vector<double>; |
| |
| double weighted_average_quantile(int k, int q, std::vector<double>::iterator first, std::vector<double>::iterator last); |
| |
| template <typename Iterator> |
| OutlierClassification classify_outliers(Iterator first, Iterator last) { |
| std::vector<double> copy(first, last); |
| |
| auto q1 = weighted_average_quantile(1, 4, copy.begin(), copy.end()); |
| auto q3 = weighted_average_quantile(3, 4, copy.begin(), copy.end()); |
| auto iqr = q3 - q1; |
| auto los = q1 - (iqr * 3.); |
| auto lom = q1 - (iqr * 1.5); |
| auto him = q3 + (iqr * 1.5); |
| auto his = q3 + (iqr * 3.); |
| |
| OutlierClassification o; |
| for (; first != last; ++first) { |
| auto&& t = *first; |
| if (t < los) ++o.low_severe; |
| else if (t < lom) ++o.low_mild; |
| else if (t > his) ++o.high_severe; |
| else if (t > him) ++o.high_mild; |
| ++o.samples_seen; |
| } |
| return o; |
| } |
| |
| template <typename Iterator> |
| double mean(Iterator first, Iterator last) { |
| auto count = last - first; |
| double sum = std::accumulate(first, last, 0.); |
| return sum / count; |
| } |
| |
| template <typename URng, typename Iterator, typename Estimator> |
| sample resample(URng& rng, int resamples, Iterator first, Iterator last, Estimator& estimator) { |
| auto n = last - first; |
| std::uniform_int_distribution<decltype(n)> dist(0, n - 1); |
| |
| sample out; |
| out.reserve(resamples); |
| std::generate_n(std::back_inserter(out), resamples, [n, first, &estimator, &dist, &rng] { |
| std::vector<double> resampled; |
| resampled.reserve(n); |
| std::generate_n(std::back_inserter(resampled), n, [first, &dist, &rng] { return first[dist(rng)]; }); |
| return estimator(resampled.begin(), resampled.end()); |
| }); |
| std::sort(out.begin(), out.end()); |
| return out; |
| } |
| |
| template <typename Estimator, typename Iterator> |
| sample jackknife(Estimator&& estimator, Iterator first, Iterator last) { |
| auto n = last - first; |
| auto second = std::next(first); |
| sample results; |
| results.reserve(n); |
| |
| for (auto it = first; it != last; ++it) { |
| std::iter_swap(it, first); |
| results.push_back(estimator(second, last)); |
| } |
| |
| return results; |
| } |
| |
| inline double normal_cdf(double x) { |
| return std::erfc(-x / std::sqrt(2.0)) / 2.0; |
| } |
| |
| double erfc_inv(double x); |
| |
| double normal_quantile(double p); |
| |
| template <typename Iterator, typename Estimator> |
| Estimate<double> bootstrap(double confidence_level, Iterator first, Iterator last, sample const& resample, Estimator&& estimator) { |
| auto n_samples = last - first; |
| |
| double point = estimator(first, last); |
| // Degenerate case with a single sample |
| if (n_samples == 1) return { point, point, point, confidence_level }; |
| |
| sample jack = jackknife(estimator, first, last); |
| double jack_mean = mean(jack.begin(), jack.end()); |
| double sum_squares, sum_cubes; |
| std::tie(sum_squares, sum_cubes) = std::accumulate(jack.begin(), jack.end(), std::make_pair(0., 0.), [jack_mean](std::pair<double, double> sqcb, double x) -> std::pair<double, double> { |
| auto d = jack_mean - x; |
| auto d2 = d * d; |
| auto d3 = d2 * d; |
| return { sqcb.first + d2, sqcb.second + d3 }; |
| }); |
| |
| double accel = sum_cubes / (6 * std::pow(sum_squares, 1.5)); |
| int n = static_cast<int>(resample.size()); |
| double prob_n = std::count_if(resample.begin(), resample.end(), [point](double x) { return x < point; }) / (double)n; |
| // degenerate case with uniform samples |
| if (prob_n == 0) return { point, point, point, confidence_level }; |
| |
| double bias = normal_quantile(prob_n); |
| double z1 = normal_quantile((1. - confidence_level) / 2.); |
| |
| auto cumn = [n](double x) -> int { |
| return std::lround(normal_cdf(x) * n); }; |
| auto a = [bias, accel](double b) { return bias + b / (1. - accel * b); }; |
| double b1 = bias + z1; |
| double b2 = bias - z1; |
| double a1 = a(b1); |
| double a2 = a(b2); |
| auto lo = std::max(cumn(a1), 0); |
| auto hi = std::min(cumn(a2), n - 1); |
| |
| return { point, resample[lo], resample[hi], confidence_level }; |
| } |
| |
| double outlier_variance(Estimate<double> mean, Estimate<double> stddev, int n); |
| |
| struct bootstrap_analysis { |
| Estimate<double> mean; |
| Estimate<double> standard_deviation; |
| double outlier_variance; |
| }; |
| |
| bootstrap_analysis analyse_samples(double confidence_level, int n_resamples, std::vector<double>::iterator first, std::vector<double>::iterator last); |
| } // namespace Detail |
| } // namespace Benchmark |
| } // namespace Catch |
| |
| #endif // TWOBLUECUBES_CATCH_DETAIL_ANALYSIS_HPP_INCLUDED |