From a8a0983f9e41c7ada5392022ffb3183b3b8f425d Mon Sep 17 00:00:00 2001 From: Vecna Date: Mon, 27 May 2024 18:55:37 -0400 Subject: [PATCH] Use statrs functions --- src/analysis.rs | 111 +++++++++++++++++------------------------------- 1 file changed, 40 insertions(+), 71 deletions(-) diff --git a/src/analysis.rs b/src/analysis.rs index 3776727..77f472f 100644 --- a/src/analysis.rs +++ b/src/analysis.rs @@ -2,6 +2,7 @@ use crate::{BridgeInfo, BridgeInfoType}; use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL; use nalgebra::DVector; use statrs::distribution::{Continuous, ContinuousCDF, MultivariateNormal, Normal}; +use statrs::statistics::Statistics; use std::{ cmp::min, collections::{BTreeMap, HashSet}, @@ -214,71 +215,6 @@ impl NormalAnalyzer { scaling_factor, } } - - fn mean(data: &[u32]) -> f64 { - let mut sum = 0.0; - for count in data { - sum += *count as f64; - } - sum / data.len() as f64 - } - - fn std_dev(data: &[u32], mean: f64) -> f64 { - let mut sum = 0.0; - for count in data { - sum += (*count as f64 - mean).powi(2); - } - (sum / data.len() as f64).sqrt() - } - - fn mean_and_std_dev(data: &[u32]) -> (f64, f64) { - let mean = Self::mean(data); - let std_dev = Self::std_dev(data, mean); - (mean, std_dev) - } - - // Returns the mean vector and covariance matrix - fn stats(data: &[&[u32]]) -> (Vec, Vec) { - let n = data.len(); - - // Compute mean vector - let mean_vec = { - let mut mean_vec = Vec::::new(); - for var in data { - mean_vec.push(Self::mean(var)); - } - mean_vec - }; - - // Compute covariance matrix - let cov_mat = { - let mut cov_mat = Vec::::new(); - // We don't need to recompute Syx, but we currently do - for i in 0..n { - for j in 0..n { - cov_mat.push({ - let var1 = data[i]; - let var1_mean = mean_vec[i]; - - let var2 = data[j]; - let var2_mean = mean_vec[j]; - - assert_eq!(var1.len(), var2.len()); - - let mut sum = 0.0; - for index in 0..var1.len() { - sum += - (var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean); - } - sum / (var1.len() - 1) as f64 - }); - } - } - cov_mat - }; - - (mean_vec, cov_mat) - } } impl Analyzer for NormalAnalyzer { @@ -309,8 +245,16 @@ impl Analyzer for NormalAnalyzer { let alpha = 1.0 - confidence; + // Convert to f64 for stats + let bridge_ips_f64 = &bridge_ips.iter().map(|n| *n as f64).collect::>(); + let negative_reports_f64 = &negative_reports + .iter() + .map(|n| *n as f64) + .collect::>(); + // Evaluate based on negative reports - let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports); + let negative_reports_mean = negative_reports_f64.mean(); + let negative_reports_sd = negative_reports_f64.std_dev(); // Only use CCDF test if today's numbers are worse than average if (negative_reports_today as f64) > negative_reports_mean { @@ -332,7 +276,8 @@ impl Analyzer for NormalAnalyzer { } // Evaluate based on bridge stats - let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips); + let bridge_ips_mean = bridge_ips_f64.mean(); + let bridge_ips_sd = bridge_ips_f64.std_dev(); // Only use CDF test if today's numbers are worse than average if (bridge_ips_today as f64) < bridge_ips_mean { @@ -374,10 +319,22 @@ impl Analyzer for NormalAnalyzer { let alpha = 1.0 - confidence; + // Convert to f64 for stats + let bridge_ips_f64 = &bridge_ips.iter().map(|n| *n as f64).collect::>(); + let negative_reports_f64 = &negative_reports + .iter() + .map(|n| *n as f64) + .collect::>(); + let positive_reports_f64 = &positive_reports + .iter() + .map(|n| *n as f64) + .collect::>(); + // Evaluate based on negative reports. It is better to compute // negative reports test first because the positive test may be // expensive. - let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports); + let negative_reports_mean = negative_reports_f64.mean(); + let negative_reports_sd = negative_reports_f64.std_dev(); // Only use CCDF test if today's numbers are worse than average if (negative_reports_today as f64) > negative_reports_mean { @@ -398,12 +355,24 @@ impl Analyzer for NormalAnalyzer { } // Evaluate based on bridge stats and positive reports. - let (mean_vec, cov_mat) = Self::stats(&[bridge_ips, positive_reports]); + let bridge_ips_mean = bridge_ips_f64.mean(); + let positive_reports_mean = positive_reports_f64.mean(); + + let cov_mat = { + let x = bridge_ips_f64; + let y = positive_reports_f64; + let xx = x.covariance(x); + let xy = x.covariance(y); + let yy = y.covariance(y); + vec![xx, xy, xy, yy] + }; // Only use CDF test if today's numbers are worse than average - if (bridge_ips_today as f64) < mean_vec[0] || (positive_reports_today as f64) < mean_vec[1] + if (bridge_ips_today as f64) < bridge_ips_mean + || (positive_reports_today as f64) < positive_reports_mean { - let mvn = MultivariateNormal::new(mean_vec, cov_mat); + let mvn = + MultivariateNormal::new(vec![bridge_ips_mean, positive_reports_mean], cov_mat); if mvn.is_ok() { let mvn = mvn.unwrap();