Use statrs functions

This commit is contained in:
Vecna 2024-05-27 18:55:37 -04:00
parent 11bedfb74a
commit a8a0983f9e
1 changed files with 40 additions and 71 deletions

View File

@ -2,6 +2,7 @@ use crate::{BridgeInfo, BridgeInfoType};
use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL;
use nalgebra::DVector;
use statrs::distribution::{Continuous, ContinuousCDF, MultivariateNormal, Normal};
use statrs::statistics::Statistics;
use std::{
cmp::min,
collections::{BTreeMap, HashSet},
@ -214,71 +215,6 @@ impl NormalAnalyzer {
scaling_factor,
}
}
fn mean(data: &[u32]) -> f64 {
let mut sum = 0.0;
for count in data {
sum += *count as f64;
}
sum / data.len() as f64
}
fn std_dev(data: &[u32], mean: f64) -> f64 {
let mut sum = 0.0;
for count in data {
sum += (*count as f64 - mean).powi(2);
}
(sum / data.len() as f64).sqrt()
}
fn mean_and_std_dev(data: &[u32]) -> (f64, f64) {
let mean = Self::mean(data);
let std_dev = Self::std_dev(data, mean);
(mean, std_dev)
}
// Returns the mean vector and covariance matrix
fn stats(data: &[&[u32]]) -> (Vec<f64>, Vec<f64>) {
let n = data.len();
// Compute mean vector
let mean_vec = {
let mut mean_vec = Vec::<f64>::new();
for var in data {
mean_vec.push(Self::mean(var));
}
mean_vec
};
// Compute covariance matrix
let cov_mat = {
let mut cov_mat = Vec::<f64>::new();
// We don't need to recompute Syx, but we currently do
for i in 0..n {
for j in 0..n {
cov_mat.push({
let var1 = data[i];
let var1_mean = mean_vec[i];
let var2 = data[j];
let var2_mean = mean_vec[j];
assert_eq!(var1.len(), var2.len());
let mut sum = 0.0;
for index in 0..var1.len() {
sum +=
(var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean);
}
sum / (var1.len() - 1) as f64
});
}
}
cov_mat
};
(mean_vec, cov_mat)
}
}
impl Analyzer for NormalAnalyzer {
@ -309,8 +245,16 @@ impl Analyzer for NormalAnalyzer {
let alpha = 1.0 - confidence;
// Convert to f64 for stats
let bridge_ips_f64 = &bridge_ips.iter().map(|n| *n as f64).collect::<Vec<f64>>();
let negative_reports_f64 = &negative_reports
.iter()
.map(|n| *n as f64)
.collect::<Vec<f64>>();
// Evaluate based on negative reports
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
let negative_reports_mean = negative_reports_f64.mean();
let negative_reports_sd = negative_reports_f64.std_dev();
// Only use CCDF test if today's numbers are worse than average
if (negative_reports_today as f64) > negative_reports_mean {
@ -332,7 +276,8 @@ impl Analyzer for NormalAnalyzer {
}
// Evaluate based on bridge stats
let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips);
let bridge_ips_mean = bridge_ips_f64.mean();
let bridge_ips_sd = bridge_ips_f64.std_dev();
// Only use CDF test if today's numbers are worse than average
if (bridge_ips_today as f64) < bridge_ips_mean {
@ -374,10 +319,22 @@ impl Analyzer for NormalAnalyzer {
let alpha = 1.0 - confidence;
// Convert to f64 for stats
let bridge_ips_f64 = &bridge_ips.iter().map(|n| *n as f64).collect::<Vec<f64>>();
let negative_reports_f64 = &negative_reports
.iter()
.map(|n| *n as f64)
.collect::<Vec<f64>>();
let positive_reports_f64 = &positive_reports
.iter()
.map(|n| *n as f64)
.collect::<Vec<f64>>();
// Evaluate based on negative reports. It is better to compute
// negative reports test first because the positive test may be
// expensive.
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
let negative_reports_mean = negative_reports_f64.mean();
let negative_reports_sd = negative_reports_f64.std_dev();
// Only use CCDF test if today's numbers are worse than average
if (negative_reports_today as f64) > negative_reports_mean {
@ -398,12 +355,24 @@ impl Analyzer for NormalAnalyzer {
}
// Evaluate based on bridge stats and positive reports.
let (mean_vec, cov_mat) = Self::stats(&[bridge_ips, positive_reports]);
let bridge_ips_mean = bridge_ips_f64.mean();
let positive_reports_mean = positive_reports_f64.mean();
let cov_mat = {
let x = bridge_ips_f64;
let y = positive_reports_f64;
let xx = x.covariance(x);
let xy = x.covariance(y);
let yy = y.covariance(y);
vec![xx, xy, xy, yy]
};
// Only use CDF test if today's numbers are worse than average
if (bridge_ips_today as f64) < mean_vec[0] || (positive_reports_today as f64) < mean_vec[1]
if (bridge_ips_today as f64) < bridge_ips_mean
|| (positive_reports_today as f64) < positive_reports_mean
{
let mvn = MultivariateNormal::new(mean_vec, cov_mat);
let mvn =
MultivariateNormal::new(vec![bridge_ips_mean, positive_reports_mean], cov_mat);
if mvn.is_ok() {
let mvn = mvn.unwrap();