From b50f40fe8af49397c68367814228cef5f4b46516 Mon Sep 17 00:00:00 2001 From: Vecna Date: Sat, 4 May 2024 14:26:18 -0400 Subject: [PATCH] Add noise when necessary to build distribution --- src/analysis.rs | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/src/analysis.rs b/src/analysis.rs index e2a59cc..005b756 100644 --- a/src/analysis.rs +++ b/src/analysis.rs @@ -1,6 +1,7 @@ use crate::{BridgeInfo, BridgeInfoType}; use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL; -use nalgebra::DVector; +use nalgebra::{Cholesky, DMatrix, DVector}; +use rand::Rng; use statrs::distribution::{Continuous, MultivariateNormal, Normal}; use std::{ cmp::min, @@ -212,7 +213,9 @@ impl NormalAnalyzer { } // Returns the mean vector, vector of individual standard deviations, and - // covariance matrix + // covariance matrix. If the standard deviation for a variable is 0 and/or + // the covariance matrix is not positive definite, add some noise to the + // data and recompute. fn stats(data: &[&[u32]]) -> (Vec, Vec, Vec) { let n = data.len(); @@ -267,7 +270,33 @@ impl NormalAnalyzer { cov_mat }; - (mean_vec, sd_vec, cov_mat) + // If any standard deviation is 0 or the covariance matrix is not + // positive definite, add some noise and recompute. + let mut recompute = false; + for sd in &sd_vec { + if *sd <= 0.0 { + recompute = true; + } + } + if Cholesky::new(DMatrix::from_vec(n, n, cov_mat.clone())).is_none() { + recompute = true; + } + + if !recompute { + (mean_vec, sd_vec, cov_mat) + } else { + // Add random noise and recompute + let mut new_data = vec![vec![0; data[0].len()]; n]; + let mut rng = rand::thread_rng(); + for i in 0..n { + for j in 0..data[i].len() { + // Add 1 to some randomly selected values + new_data[i][j] = data[i][j] + rng.gen_range(0..=1); + } + } + // Compute stats on modified data + Self::stats(&new_data.iter().map(Vec::as_slice).collect::>()) + } } }