Files
core_math/tjp/core/math/Statistics.h
Timothy Prepscius 0807c0286a flatten 20260225
2026-02-25 12:36:47 -05:00

666 lines
9.5 KiB
C++
Executable File

//
// Server.hpp
// TimeSync
//
// Created by Timothy Prepscius on 1/27/17.
// Copyright © 2017 Timothy Prepscius. All rights reserved.
//
#pragma once
#include <cmath>
#include <vector>
#include <numeric>
#include <iostream>
#include <functional>
#include <climits>
#include <algorithm>
#include <deque>
namespace tjp {
namespace core {
namespace math {
struct RunningStatsWeightedAverager
{
typedef double T;
double weight, oneMinusWeight;
T m, s;
bool initialized = false;
RunningStatsWeightedAverager(double weight_ = 0.8) :
m(0),
s(0),
weight(weight_),
oneMinusWeight(1.0 - weight_)
{
clear();
}
void clear()
{
m = 0;
s = 0;
}
void initialize(T x)
{
m = x;
initialized = true;
}
void push(T x)
{
if (!initialized)
initialize(x);
// See Knuth TAOCP vol 2, 3rd edition, page 232
auto newM = weight * m + oneMinusWeight * x;
auto newS = weight * s + oneMinusWeight * std::abs(x - m);
// set up for next iteration
m = newM;
s = newS;
}
double mean() const
{
return m;
}
double error() const
{
return s;
}
};
template<typename T>
class RunningStats
{
public:
RunningStats()
{
clear();
}
void clear()
{
n = 0;
m = 0;
s = 0;
}
void push(T x)
{
n++;
// See Knuth TAOCP vol 2, 3rd edition, page 232
auto newM = m + (x - m) / n;
auto newS = s + (x - m)*(x - newM);
// set up for next iteration
m = newM;
s = newS;
}
int numDataValues() const
{
return n;
}
double mean() const
{
return (n > 0) ? m : 0.0;
}
double variance() const
{
return ((n > 1) ? s / (n - 1) : 0.0);
}
double standardDeviation() const
{
return std::sqrt(variance());
}
private:
int n;
T m, s;
};
template<typename T>
class AccumulatedStats
{
protected:
std::vector<T> samples;
T unresolved;
T m;
T e;
void calculate()
{
m = e = unresolved;
if (samples.empty())
return;
T s = 0;
for (auto &t : samples)
{
s += t;
}
m = s /= samples.size();
if (samples.size() < 2)
return;
T s0 = 0;
for (int i = 0; i < samples.size() / 2; ++i)
{
s0 += samples[i];
}
T s1 = 0;
for (int i = samples.size() / 2; i < samples.size(); ++i)
{
s1 += samples[i];
}
s0 /= samples.size() / 2;
s1 /= samples.size() - samples.size() / 2;
e = s1 - s0;
}
public:
AccumulatedStats(T unresolved)
{
this->unresolved = unresolved;
}
void push(T t)
{
samples.push_back(t);
calculate();
}
void clear()
{
samples.clear();
calculate();
}
T mean()
{
return m;
}
T error()
{
return e;
}
T errorSize()
{
return std::abs(e);
}
};
template<typename R, typename T=R>
R valueOf(const T &t) { return (R)t; }
template<typename R, typename T=R>
R weightOf(const T &t) { return (R)1.0; }
template<typename T>
double stdev_lengthSquared(const T &d)
{
return (double)d * (double)d;
}
template<typename T>
T zero_of()
{
return (T)0.0;
}
template<typename R, typename V>
R mean(const V &v, std::function<R(const typename V::value_type &)> f=valueOf<R, typename V::value_type>)
{
R sum = zero_of<R>();
for (auto &t : v)
{
sum += f(t);
}
auto mean = sum / v.size();
return mean;
}
template<typename R, typename V>
R weighted_mean(const V &v,
std::function<R(const typename V::value_type &)> f=valueOf<R, typename V::value_type>,
std::function<R(const typename V::value_type &)> w=weightOf<R, typename V::value_type>
)
{
R sum = zero_of<R>();
R weight = zero_of<R>();
for (auto &t : v)
{
auto w_ = w(t);
sum += w_ * f(t);
weight += w_;
}
auto mean = sum / weight;
return mean;
}
template<typename R, typename V>
double stdev(const V &v, const R &mean, std::function<R(const typename V::value_type &)> f=valueOf<R, typename V::value_type>)
{
double sum = zero_of<double>();
for (auto &t : v)
{
auto diff = f(t) - mean;
sum += stdev_lengthSquared(diff);
}
return std::sqrt(sum / (double)v.size());
}
template<typename R, typename V>
double stdev(const V &v, std::function<R(const typename V::value_type &)> f=valueOf<R, typename V::value_type>)
{
typedef typename V::value_type T;
T mean = math::mean<T>(v, f);
return stdev(v, mean);
}
template<typename T>
struct MeanStddev
{
T mean;
T stddev;
} ;
template<typename R, typename V>
MeanStddev<R> rejectOutliersMeanStddev(const V &samples, R a, std::function<R(const typename V::value_type &)> f=valueOf<R, typename V::value_type>)
{
R m, e;
auto u = math::mean(samples);
auto s = math::stdev(samples);
// filtered = [e for e in data_bad if (u - 2 * s < e < u + 2 * s)]
std::vector<R> filtered;
for (auto &v : samples)
{
if (u - a * s < v && v < u + a * s)
{
filtered.push_back(v);
}
}
if (filtered.empty())
{
m = e = 0;
return { m, e };
}
return { math::mean(filtered), math::stdev(filtered) };
}
template<typename T>
class RejectOutlierStats
{
protected:
std::vector<T> samples;
double a;
T unresolved;
T m;
T e;
void calculate()
{
if (samples.empty())
{
m = e = unresolved;
return;
}
auto u = math::mean<T>(samples);
auto s = math::stdev<T>(samples);
// filtered = [e for e in data_bad if (u - 2 * s < e < u + 2 * s)]
std::vector<T> filtered;
for (auto &v : samples)
{
if (u - a * s < v && v < u + a * s)
{
filtered.push_back(v);
}
}
if (filtered.empty())
{
m = e = unresolved;
return;
}
auto m = math::mean<T>(filtered);
auto e = math::stdev<T>(filtered);
this->m = m;
this->e = e;
}
public:
RejectOutlierStats(T unresolved, double a = 2.0)
{
this->unresolved = unresolved;
m = e = unresolved;
this->a = a;
}
void push(T t)
{
samples.push_back(t);
calculate();
}
void clear()
{
samples.clear();
calculate();
}
T mean()
{
return m;
}
T error()
{
return e;
}
T errorSize()
{
return std::abs(e);
}
size_t size () const
{
return samples.size();
}
};
template<typename T>
class WindowStats
{
protected:
std::deque<T> samples;
int windowSize;
double a;
T unresolved;
T m;
T e;
T l;
void calculate()
{
m = e = l = unresolved;
if (samples.empty())
return;
auto &window = samples;
typedef double R;
m = math::mean<R>(window);
e = math::stdev<R>(window);
}
public:
WindowStats(T unresolved, int windowSize, double a = 2.0)
{
this->unresolved = unresolved;
l = m = e = unresolved;
this->a = a;
this->windowSize = windowSize;
}
void push(T t)
{
samples.push_front(t);
while (samples.size() > windowSize)
samples.pop_back();
calculate();
}
void clear()
{
samples.clear();
calculate();
}
T mean()
{
return m;
}
T min()
{
return l;
}
T error()
{
return e;
}
T errorSize()
{
return std::abs(e);
}
};
template<typename T>
class RejectOutlierWindowStats
{
protected:
std::deque<T> samples;
int windowSize;
double a;
T unresolved;
T m;
T e;
T l;
void calculate()
{
m = e = l = unresolved;
if (samples.empty())
return;
auto &window = samples;
typedef double R;
R u = math::mean<R>(window);
R s = math::stdev<R>(window);
// filtered = [e for e in data_bad if (u - 2 * s < e < u + 2 * s)]
std::vector<T> filtered;
for (auto &v : window)
{
if (l == unresolved)
l = v;
else
l = std::min(l, v);
if (u - a * s < v && v < u + a * s)
{
filtered.push_back(v);
}
}
if (filtered.empty())
return;
m = math::mean<R>(filtered);
e = math::stdev<R>(filtered);
}
public:
RejectOutlierWindowStats(T unresolved, int windowSize, double a = 2.0)
{
this->unresolved = unresolved;
l = m = e = unresolved;
this->a = a;
this->windowSize = windowSize;
}
void push(T t)
{
samples.push_back(t);
if (samples.size() > windowSize)
samples.pop_front();
calculate();
}
void clear()
{
samples.clear();
calculate();
}
T mean()
{
return m;
}
T min()
{
return l;
}
T error()
{
return e;
}
T errorSize()
{
return std::abs(e);
}
};
template<typename T, typename R>
class RejectOutlierUseBestStatsWindow
{
protected:
typedef std::pair<T, R> Sample;
std::vector<Sample> samples;
int numSamplesToReduce;
int numSamplesToInclude;
double a;
T unresolved;
T m;
T e;
void calculate()
{
m = e = unresolved;
if (samples.empty())
return;
auto numSamplesToReduce = std::min((int)samples.size(), this->numSamplesToReduce == -1 ? INT_MAX : this->numSamplesToReduce);
std::vector<Sample> last(samples.end() - numSamplesToReduce, samples.end());
std::sort(last.begin(), last.end(), [](Sample &l, Sample &r) { return l.second < r.second; });
auto numSamplesToInclude = std::min((int)last.size(), this->numSamplesToInclude);
std::vector<T> best;
for (auto i = 0; i < numSamplesToInclude; ++i)
{
best.push_back(last[i].first);
}
double u = math::mean(best);
double s = math::stdev(best);
// filtered = [e for e in data_bad if (u - 2 * s < e < u + 2 * s)]
std::vector<T> filtered;
for (auto &v : best)
{
if (u - a * s < v && v < u + a * s)
{
filtered.push_back(v);
}
}
if (filtered.empty())
return;
m = math::mean(filtered);
e = math::stdev(filtered);
}
public:
RejectOutlierUseBestStatsWindow(T unresolved, int numSamplesToReduce, int numSamplesToInclude, double a = 2.0)
{
this->unresolved = unresolved;
m = e = unresolved;
this->a = a;
this->numSamplesToReduce = numSamplesToReduce;
this->numSamplesToInclude = numSamplesToInclude;
}
void push(T t, R r)
{
samples.push_back({ t, r });
calculate();
}
void clear()
{
samples.clear();
calculate();
}
T mean()
{
return m;
}
T error()
{
return e;
}
T errorSize()
{
return std::abs(e);
}
};
} // namespace
} // namespace
} // namespace