Use statrs functions
This commit is contained in:
parent
11bedfb74a
commit
a8a0983f9e
111
src/analysis.rs
111
src/analysis.rs
|
@ -2,6 +2,7 @@ use crate::{BridgeInfo, BridgeInfoType};
|
||||||
use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL;
|
use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL;
|
||||||
use nalgebra::DVector;
|
use nalgebra::DVector;
|
||||||
use statrs::distribution::{Continuous, ContinuousCDF, MultivariateNormal, Normal};
|
use statrs::distribution::{Continuous, ContinuousCDF, MultivariateNormal, Normal};
|
||||||
|
use statrs::statistics::Statistics;
|
||||||
use std::{
|
use std::{
|
||||||
cmp::min,
|
cmp::min,
|
||||||
collections::{BTreeMap, HashSet},
|
collections::{BTreeMap, HashSet},
|
||||||
|
@ -214,71 +215,6 @@ impl NormalAnalyzer {
|
||||||
scaling_factor,
|
scaling_factor,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn mean(data: &[u32]) -> f64 {
|
|
||||||
let mut sum = 0.0;
|
|
||||||
for count in data {
|
|
||||||
sum += *count as f64;
|
|
||||||
}
|
|
||||||
sum / data.len() as f64
|
|
||||||
}
|
|
||||||
|
|
||||||
fn std_dev(data: &[u32], mean: f64) -> f64 {
|
|
||||||
let mut sum = 0.0;
|
|
||||||
for count in data {
|
|
||||||
sum += (*count as f64 - mean).powi(2);
|
|
||||||
}
|
|
||||||
(sum / data.len() as f64).sqrt()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn mean_and_std_dev(data: &[u32]) -> (f64, f64) {
|
|
||||||
let mean = Self::mean(data);
|
|
||||||
let std_dev = Self::std_dev(data, mean);
|
|
||||||
(mean, std_dev)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns the mean vector and covariance matrix
|
|
||||||
fn stats(data: &[&[u32]]) -> (Vec<f64>, Vec<f64>) {
|
|
||||||
let n = data.len();
|
|
||||||
|
|
||||||
// Compute mean vector
|
|
||||||
let mean_vec = {
|
|
||||||
let mut mean_vec = Vec::<f64>::new();
|
|
||||||
for var in data {
|
|
||||||
mean_vec.push(Self::mean(var));
|
|
||||||
}
|
|
||||||
mean_vec
|
|
||||||
};
|
|
||||||
|
|
||||||
// Compute covariance matrix
|
|
||||||
let cov_mat = {
|
|
||||||
let mut cov_mat = Vec::<f64>::new();
|
|
||||||
// We don't need to recompute Syx, but we currently do
|
|
||||||
for i in 0..n {
|
|
||||||
for j in 0..n {
|
|
||||||
cov_mat.push({
|
|
||||||
let var1 = data[i];
|
|
||||||
let var1_mean = mean_vec[i];
|
|
||||||
|
|
||||||
let var2 = data[j];
|
|
||||||
let var2_mean = mean_vec[j];
|
|
||||||
|
|
||||||
assert_eq!(var1.len(), var2.len());
|
|
||||||
|
|
||||||
let mut sum = 0.0;
|
|
||||||
for index in 0..var1.len() {
|
|
||||||
sum +=
|
|
||||||
(var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean);
|
|
||||||
}
|
|
||||||
sum / (var1.len() - 1) as f64
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
cov_mat
|
|
||||||
};
|
|
||||||
|
|
||||||
(mean_vec, cov_mat)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Analyzer for NormalAnalyzer {
|
impl Analyzer for NormalAnalyzer {
|
||||||
|
@ -309,8 +245,16 @@ impl Analyzer for NormalAnalyzer {
|
||||||
|
|
||||||
let alpha = 1.0 - confidence;
|
let alpha = 1.0 - confidence;
|
||||||
|
|
||||||
|
// Convert to f64 for stats
|
||||||
|
let bridge_ips_f64 = &bridge_ips.iter().map(|n| *n as f64).collect::<Vec<f64>>();
|
||||||
|
let negative_reports_f64 = &negative_reports
|
||||||
|
.iter()
|
||||||
|
.map(|n| *n as f64)
|
||||||
|
.collect::<Vec<f64>>();
|
||||||
|
|
||||||
// Evaluate based on negative reports
|
// Evaluate based on negative reports
|
||||||
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
|
let negative_reports_mean = negative_reports_f64.mean();
|
||||||
|
let negative_reports_sd = negative_reports_f64.std_dev();
|
||||||
|
|
||||||
// Only use CCDF test if today's numbers are worse than average
|
// Only use CCDF test if today's numbers are worse than average
|
||||||
if (negative_reports_today as f64) > negative_reports_mean {
|
if (negative_reports_today as f64) > negative_reports_mean {
|
||||||
|
@ -332,7 +276,8 @@ impl Analyzer for NormalAnalyzer {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Evaluate based on bridge stats
|
// Evaluate based on bridge stats
|
||||||
let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips);
|
let bridge_ips_mean = bridge_ips_f64.mean();
|
||||||
|
let bridge_ips_sd = bridge_ips_f64.std_dev();
|
||||||
|
|
||||||
// Only use CDF test if today's numbers are worse than average
|
// Only use CDF test if today's numbers are worse than average
|
||||||
if (bridge_ips_today as f64) < bridge_ips_mean {
|
if (bridge_ips_today as f64) < bridge_ips_mean {
|
||||||
|
@ -374,10 +319,22 @@ impl Analyzer for NormalAnalyzer {
|
||||||
|
|
||||||
let alpha = 1.0 - confidence;
|
let alpha = 1.0 - confidence;
|
||||||
|
|
||||||
|
// Convert to f64 for stats
|
||||||
|
let bridge_ips_f64 = &bridge_ips.iter().map(|n| *n as f64).collect::<Vec<f64>>();
|
||||||
|
let negative_reports_f64 = &negative_reports
|
||||||
|
.iter()
|
||||||
|
.map(|n| *n as f64)
|
||||||
|
.collect::<Vec<f64>>();
|
||||||
|
let positive_reports_f64 = &positive_reports
|
||||||
|
.iter()
|
||||||
|
.map(|n| *n as f64)
|
||||||
|
.collect::<Vec<f64>>();
|
||||||
|
|
||||||
// Evaluate based on negative reports. It is better to compute
|
// Evaluate based on negative reports. It is better to compute
|
||||||
// negative reports test first because the positive test may be
|
// negative reports test first because the positive test may be
|
||||||
// expensive.
|
// expensive.
|
||||||
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
|
let negative_reports_mean = negative_reports_f64.mean();
|
||||||
|
let negative_reports_sd = negative_reports_f64.std_dev();
|
||||||
|
|
||||||
// Only use CCDF test if today's numbers are worse than average
|
// Only use CCDF test if today's numbers are worse than average
|
||||||
if (negative_reports_today as f64) > negative_reports_mean {
|
if (negative_reports_today as f64) > negative_reports_mean {
|
||||||
|
@ -398,12 +355,24 @@ impl Analyzer for NormalAnalyzer {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Evaluate based on bridge stats and positive reports.
|
// Evaluate based on bridge stats and positive reports.
|
||||||
let (mean_vec, cov_mat) = Self::stats(&[bridge_ips, positive_reports]);
|
let bridge_ips_mean = bridge_ips_f64.mean();
|
||||||
|
let positive_reports_mean = positive_reports_f64.mean();
|
||||||
|
|
||||||
|
let cov_mat = {
|
||||||
|
let x = bridge_ips_f64;
|
||||||
|
let y = positive_reports_f64;
|
||||||
|
let xx = x.covariance(x);
|
||||||
|
let xy = x.covariance(y);
|
||||||
|
let yy = y.covariance(y);
|
||||||
|
vec![xx, xy, xy, yy]
|
||||||
|
};
|
||||||
|
|
||||||
// Only use CDF test if today's numbers are worse than average
|
// Only use CDF test if today's numbers are worse than average
|
||||||
if (bridge_ips_today as f64) < mean_vec[0] || (positive_reports_today as f64) < mean_vec[1]
|
if (bridge_ips_today as f64) < bridge_ips_mean
|
||||||
|
|| (positive_reports_today as f64) < positive_reports_mean
|
||||||
{
|
{
|
||||||
let mvn = MultivariateNormal::new(mean_vec, cov_mat);
|
let mvn =
|
||||||
|
MultivariateNormal::new(vec![bridge_ips_mean, positive_reports_mean], cov_mat);
|
||||||
if mvn.is_ok() {
|
if mvn.is_ok() {
|
||||||
let mvn = mvn.unwrap();
|
let mvn = mvn.unwrap();
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue