Use statrs functions
This commit is contained in:
parent
11bedfb74a
commit
a8a0983f9e
111
src/analysis.rs
111
src/analysis.rs
|
@ -2,6 +2,7 @@ use crate::{BridgeInfo, BridgeInfoType};
|
|||
use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL;
|
||||
use nalgebra::DVector;
|
||||
use statrs::distribution::{Continuous, ContinuousCDF, MultivariateNormal, Normal};
|
||||
use statrs::statistics::Statistics;
|
||||
use std::{
|
||||
cmp::min,
|
||||
collections::{BTreeMap, HashSet},
|
||||
|
@ -214,71 +215,6 @@ impl NormalAnalyzer {
|
|||
scaling_factor,
|
||||
}
|
||||
}
|
||||
|
||||
fn mean(data: &[u32]) -> f64 {
|
||||
let mut sum = 0.0;
|
||||
for count in data {
|
||||
sum += *count as f64;
|
||||
}
|
||||
sum / data.len() as f64
|
||||
}
|
||||
|
||||
fn std_dev(data: &[u32], mean: f64) -> f64 {
|
||||
let mut sum = 0.0;
|
||||
for count in data {
|
||||
sum += (*count as f64 - mean).powi(2);
|
||||
}
|
||||
(sum / data.len() as f64).sqrt()
|
||||
}
|
||||
|
||||
fn mean_and_std_dev(data: &[u32]) -> (f64, f64) {
|
||||
let mean = Self::mean(data);
|
||||
let std_dev = Self::std_dev(data, mean);
|
||||
(mean, std_dev)
|
||||
}
|
||||
|
||||
// Returns the mean vector and covariance matrix
|
||||
fn stats(data: &[&[u32]]) -> (Vec<f64>, Vec<f64>) {
|
||||
let n = data.len();
|
||||
|
||||
// Compute mean vector
|
||||
let mean_vec = {
|
||||
let mut mean_vec = Vec::<f64>::new();
|
||||
for var in data {
|
||||
mean_vec.push(Self::mean(var));
|
||||
}
|
||||
mean_vec
|
||||
};
|
||||
|
||||
// Compute covariance matrix
|
||||
let cov_mat = {
|
||||
let mut cov_mat = Vec::<f64>::new();
|
||||
// We don't need to recompute Syx, but we currently do
|
||||
for i in 0..n {
|
||||
for j in 0..n {
|
||||
cov_mat.push({
|
||||
let var1 = data[i];
|
||||
let var1_mean = mean_vec[i];
|
||||
|
||||
let var2 = data[j];
|
||||
let var2_mean = mean_vec[j];
|
||||
|
||||
assert_eq!(var1.len(), var2.len());
|
||||
|
||||
let mut sum = 0.0;
|
||||
for index in 0..var1.len() {
|
||||
sum +=
|
||||
(var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean);
|
||||
}
|
||||
sum / (var1.len() - 1) as f64
|
||||
});
|
||||
}
|
||||
}
|
||||
cov_mat
|
||||
};
|
||||
|
||||
(mean_vec, cov_mat)
|
||||
}
|
||||
}
|
||||
|
||||
impl Analyzer for NormalAnalyzer {
|
||||
|
@ -309,8 +245,16 @@ impl Analyzer for NormalAnalyzer {
|
|||
|
||||
let alpha = 1.0 - confidence;
|
||||
|
||||
// Convert to f64 for stats
|
||||
let bridge_ips_f64 = &bridge_ips.iter().map(|n| *n as f64).collect::<Vec<f64>>();
|
||||
let negative_reports_f64 = &negative_reports
|
||||
.iter()
|
||||
.map(|n| *n as f64)
|
||||
.collect::<Vec<f64>>();
|
||||
|
||||
// Evaluate based on negative reports
|
||||
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
|
||||
let negative_reports_mean = negative_reports_f64.mean();
|
||||
let negative_reports_sd = negative_reports_f64.std_dev();
|
||||
|
||||
// Only use CCDF test if today's numbers are worse than average
|
||||
if (negative_reports_today as f64) > negative_reports_mean {
|
||||
|
@ -332,7 +276,8 @@ impl Analyzer for NormalAnalyzer {
|
|||
}
|
||||
|
||||
// Evaluate based on bridge stats
|
||||
let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips);
|
||||
let bridge_ips_mean = bridge_ips_f64.mean();
|
||||
let bridge_ips_sd = bridge_ips_f64.std_dev();
|
||||
|
||||
// Only use CDF test if today's numbers are worse than average
|
||||
if (bridge_ips_today as f64) < bridge_ips_mean {
|
||||
|
@ -374,10 +319,22 @@ impl Analyzer for NormalAnalyzer {
|
|||
|
||||
let alpha = 1.0 - confidence;
|
||||
|
||||
// Convert to f64 for stats
|
||||
let bridge_ips_f64 = &bridge_ips.iter().map(|n| *n as f64).collect::<Vec<f64>>();
|
||||
let negative_reports_f64 = &negative_reports
|
||||
.iter()
|
||||
.map(|n| *n as f64)
|
||||
.collect::<Vec<f64>>();
|
||||
let positive_reports_f64 = &positive_reports
|
||||
.iter()
|
||||
.map(|n| *n as f64)
|
||||
.collect::<Vec<f64>>();
|
||||
|
||||
// Evaluate based on negative reports. It is better to compute
|
||||
// negative reports test first because the positive test may be
|
||||
// expensive.
|
||||
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
|
||||
let negative_reports_mean = negative_reports_f64.mean();
|
||||
let negative_reports_sd = negative_reports_f64.std_dev();
|
||||
|
||||
// Only use CCDF test if today's numbers are worse than average
|
||||
if (negative_reports_today as f64) > negative_reports_mean {
|
||||
|
@ -398,12 +355,24 @@ impl Analyzer for NormalAnalyzer {
|
|||
}
|
||||
|
||||
// Evaluate based on bridge stats and positive reports.
|
||||
let (mean_vec, cov_mat) = Self::stats(&[bridge_ips, positive_reports]);
|
||||
let bridge_ips_mean = bridge_ips_f64.mean();
|
||||
let positive_reports_mean = positive_reports_f64.mean();
|
||||
|
||||
let cov_mat = {
|
||||
let x = bridge_ips_f64;
|
||||
let y = positive_reports_f64;
|
||||
let xx = x.covariance(x);
|
||||
let xy = x.covariance(y);
|
||||
let yy = y.covariance(y);
|
||||
vec![xx, xy, xy, yy]
|
||||
};
|
||||
|
||||
// Only use CDF test if today's numbers are worse than average
|
||||
if (bridge_ips_today as f64) < mean_vec[0] || (positive_reports_today as f64) < mean_vec[1]
|
||||
if (bridge_ips_today as f64) < bridge_ips_mean
|
||||
|| (positive_reports_today as f64) < positive_reports_mean
|
||||
{
|
||||
let mvn = MultivariateNormal::new(mean_vec, cov_mat);
|
||||
let mvn =
|
||||
MultivariateNormal::new(vec![bridge_ips_mean, positive_reports_mean], cov_mat);
|
||||
if mvn.is_ok() {
|
||||
let mvn = mvn.unwrap();
|
||||
|
||||
|
|
Loading…
Reference in New Issue