From 659b8fa16c8439b0f6dca68057db664d948807b8 Mon Sep 17 00:00:00 2001 From: Vecna Date: Wed, 22 May 2024 16:04:52 -0400 Subject: [PATCH] Refactor analysis for efficiency --- src/analysis.rs | 109 ++++++++++++++++++++++++++++-------------------- 1 file changed, 64 insertions(+), 45 deletions(-) diff --git a/src/analysis.rs b/src/analysis.rs index 9a84de0..c3fc60e 100644 --- a/src/analysis.rs +++ b/src/analysis.rs @@ -309,35 +309,44 @@ impl Analyzer for NormalAnalyzer { let alpha = 1.0 - confidence; - let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips); + // Evaluate based on negative reports let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports); - - // Model negative reports separately - let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd); let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd); - - // If we have 0 standard deviation, we need another way to - // evaluate each variable - let bip_test = if bridge_ips_sd > 0.0 { - bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha - } else { - // Consider the bridge blocked if its usage dropped by more - // than 1 bin. (Note that the mean is the exact value - // because we had no deviation.) - (bridge_ips_today as f64) < bridge_ips_mean - 8.0 - }; - let nr_test = if negative_reports_sd > 0.0 { + if negative_reports_sd > 0.0 { // We use CCDF because more negative reports is worse. - (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha + if (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha { + return true; + } } else { + // If the standard deviation is 0, we need another option. // Consider the bridge blocked negative reports increase by // more than 1 after a long static period. (Note that the // mean is the exact value because we had no deviation.) - (negative_reports_today as f64) > negative_reports_mean + 1.0 - }; + if (negative_reports_today as f64) > negative_reports_mean + 1.0 { + return true; + } + } - // Return true if any test concluded the bridge is blocked - bip_test || nr_test + // Evaluate based on bridge stats + let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips); + let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd); + if bridge_ips_sd > 0.0 { + if bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha { + return true; + } + } else { + // If the standard deviation is 0, we need another option. + // Consider the bridge blocked if its usage dropped by more + // than 1 bin. (Note that the mean is the exact value + // because we had no deviation.) + if (bridge_ips_today as f64) < bridge_ips_mean - 8.0 { + return true; + } + } + + // If none of the tests concluded that the bridge is blocked, + // return false + false } /// Evaluate invite-only bridge with lv3+ users submitting positive reports @@ -357,19 +366,29 @@ impl Analyzer for NormalAnalyzer { let alpha = 1.0 - confidence; - // Model bridge IPs and positive reports with multivariate - // normal distribution - let (mean_vec, cov_mat) = Self::stats(&[bridge_ips, positive_reports]); - let mvn = MultivariateNormal::new(mean_vec, cov_mat); - - // Model negative reports separately + // Evaluate based on negative reports. It is better to compute + // negative reports test first because the positive test may be + // expensive. let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports); let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd); + if negative_reports_sd > 0.0 { + // We use CCDF because more negative reports is worse. + if (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha { + return true; + } + } else { + // Consider the bridge blocked negative reports increase by + // more than 1 after a long static period. (Note that the + // mean is the exact value because we had no deviation.) + if (negative_reports_today as f64) > negative_reports_mean + 1.0 { + return true; + } + } - // If we have 0 standard deviation or a covariance matrix that - // is not positive definite, we need another way to evaluate - // each variable - let positive_test = if mvn.is_ok() { + // Evaluate based on bridge stats and positive reports. + let (mean_vec, cov_mat) = Self::stats(&[bridge_ips, positive_reports]); + let mvn = MultivariateNormal::new(mean_vec, cov_mat); + if mvn.is_ok() { let mvn = mvn.unwrap(); // Estimate the CDF by integrating the PDF by hand with step @@ -380,27 +399,27 @@ impl Analyzer for NormalAnalyzer { cdf += mvn.pdf(&DVector::from_vec(vec![bip as f64, pr as f64])); } } - cdf < alpha + if cdf < alpha { + return true; + } } else { - // Ignore positive reports and compute as in stage 2 - self.stage_two( + // If we have 0 standard deviation or a covariance matrix + // that is not positive definite, we need another way to + // evaluate each variable. Ignore positive reports and + // compute as in stage 2 + if self.stage_two( confidence, bridge_ips, bridge_ips_today, negative_reports, negative_reports_today, - ) - }; - let nr_test = if negative_reports_sd > 0.0 { - // We use CCDF because more negative reports is worse. - (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha - } else { - // Consider the bridge blocked negative reports increase by - // more than 1 after a long static period. (Note that the - // mean is the exact value because we had no deviation.) - (negative_reports_today as f64) > negative_reports_mean + 1.0 + ) { + return true; + } }; - positive_test || nr_test + // If none of the tests concluded that the bridge is blocked, + // return false + false } }