Stage 2: Model as two distributions, handle 0 standard deviation

This commit is contained in:
Vecna 2024-05-21 17:16:26 -04:00
parent 97d4622cd4
commit 3512adc425
1 changed files with 52 additions and 25 deletions

View File

@ -216,6 +216,28 @@ impl NormalAnalyzer {
}
}
fn mean(data: &[u32]) -> f64 {
let mut sum = 0.0;
for count in data {
sum += *count as f64;
}
sum / data.len() as f64
}
fn std_dev(data: &[u32], mean: f64) -> f64 {
let mut sum = 0.0;
for count in data {
sum += (*count as f64 - mean).powi(2);
}
(sum / data.len() as f64).sqrt()
}
fn mean_and_std_dev(data: &[u32]) -> (f64, f64) {
let mean = Self::mean(data);
let std = Self::std_dev(data, mean);
(mean, std)
}
// Returns the mean vector, vector of individual standard deviations, and
// covariance matrix. If the standard deviation for a variable is 0 and/or
// the covariance matrix is not positive definite, add some noise to the
@ -318,7 +340,7 @@ impl Analyzer for NormalAnalyzer {
|| f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today)
}
/// Evaluate invite-only bridge based on last 30 days
/// Evaluate invite-only bridge based on historical data
fn stage_two(
&self,
confidence: f64,
@ -332,30 +354,35 @@ impl Analyzer for NormalAnalyzer {
let alpha = 1.0 - confidence;
let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports]);
let bridge_ips_mean = mean_vec[0];
let negative_reports_mean = mean_vec[1];
let bridge_ips_sd = sd_vec[0];
let negative_reports_sd = sd_vec[1];
let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips);
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
/*
let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
let pdf = mvn.pdf(&DVector::from_vec(vec![
bridge_ips_today as f64,
negative_reports_today as f64,
]));
*/
// Model each variable with a normal distribution.
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd);
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
// Model each variable in isolation. We use 1 - the CDF for
// negative reports because more negative reports is worse.
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd).unwrap();
let bip_cdf = bip_normal.cdf(bridge_ips_today as f64);
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
let nr_cdf = 1.0 - nr_normal.cdf(negative_reports_today as f64);
// If we have 0 standard deviation, we need another way to
// evaluate each variable
let bip_test = if bridge_ips_sd > 0.0 {
bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha
} else {
// Consider the bridge blocked if its usage dropped by more
// than 1 bin. (Note that the mean is the exact value
// because we had no deviation.)
(bridge_ips_today as f64) < bridge_ips_mean - 8.0
};
let nr_test = if negative_reports_sd > 0.0 {
// We use CCDF because more negative reports is worse.
(1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha
} else {
// Consider the bridge blocked negative reports increase by
// more than 1 after a long static period. (Note that the
// mean is the exact value because we had no deviation.)
(negative_reports_today as f64) > negative_reports_mean + 1.0
};
// For now, just look at each variable in isolation
// TODO: How do we do a multivariate normal CDF?
bip_cdf < alpha || nr_cdf < alpha
// Return true if any test concluded the bridge is blocked
bip_test || nr_test
}
/// Evaluate invite-only bridge with lv3+ users submitting positive reports
@ -393,17 +420,17 @@ impl Analyzer for NormalAnalyzer {
]));
*/
// Model each variable in isolation. We use 1 - the CDF for
// Model each variable in isolation. We use the CCDF for
// negative reports because more negative reports is worse.
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd).unwrap();
let bip_cdf = bip_normal.cdf(bridge_ips_today as f64);
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
let nr_cdf = 1.0 - nr_normal.cdf(negative_reports_today as f64);
let nr_ccdf = 1.0 - nr_normal.cdf(negative_reports_today as f64);
let pr_normal = Normal::new(positive_reports_mean, positive_reports_sd).unwrap();
let pr_cdf = pr_normal.cdf(positive_reports_today as f64);
// For now, just look at each variable in isolation
// TODO: How do we do a multivariate normal CDF?
bip_cdf < alpha || nr_cdf < alpha || pr_cdf < alpha
bip_cdf < alpha || nr_ccdf < alpha || pr_cdf < alpha
}
}