Stage 2: Model as two distributions, handle 0 standard deviation
This commit is contained in:
parent
97d4622cd4
commit
3512adc425
|
@ -216,6 +216,28 @@ impl NormalAnalyzer {
|
|||
}
|
||||
}
|
||||
|
||||
fn mean(data: &[u32]) -> f64 {
|
||||
let mut sum = 0.0;
|
||||
for count in data {
|
||||
sum += *count as f64;
|
||||
}
|
||||
sum / data.len() as f64
|
||||
}
|
||||
|
||||
fn std_dev(data: &[u32], mean: f64) -> f64 {
|
||||
let mut sum = 0.0;
|
||||
for count in data {
|
||||
sum += (*count as f64 - mean).powi(2);
|
||||
}
|
||||
(sum / data.len() as f64).sqrt()
|
||||
}
|
||||
|
||||
fn mean_and_std_dev(data: &[u32]) -> (f64, f64) {
|
||||
let mean = Self::mean(data);
|
||||
let std = Self::std_dev(data, mean);
|
||||
(mean, std)
|
||||
}
|
||||
|
||||
// Returns the mean vector, vector of individual standard deviations, and
|
||||
// covariance matrix. If the standard deviation for a variable is 0 and/or
|
||||
// the covariance matrix is not positive definite, add some noise to the
|
||||
|
@ -318,7 +340,7 @@ impl Analyzer for NormalAnalyzer {
|
|||
|| f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today)
|
||||
}
|
||||
|
||||
/// Evaluate invite-only bridge based on last 30 days
|
||||
/// Evaluate invite-only bridge based on historical data
|
||||
fn stage_two(
|
||||
&self,
|
||||
confidence: f64,
|
||||
|
@ -332,30 +354,35 @@ impl Analyzer for NormalAnalyzer {
|
|||
|
||||
let alpha = 1.0 - confidence;
|
||||
|
||||
let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports]);
|
||||
let bridge_ips_mean = mean_vec[0];
|
||||
let negative_reports_mean = mean_vec[1];
|
||||
let bridge_ips_sd = sd_vec[0];
|
||||
let negative_reports_sd = sd_vec[1];
|
||||
let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips);
|
||||
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
|
||||
|
||||
/*
|
||||
let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
|
||||
let pdf = mvn.pdf(&DVector::from_vec(vec![
|
||||
bridge_ips_today as f64,
|
||||
negative_reports_today as f64,
|
||||
]));
|
||||
*/
|
||||
// Model each variable with a normal distribution.
|
||||
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd);
|
||||
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
|
||||
|
||||
// Model each variable in isolation. We use 1 - the CDF for
|
||||
// negative reports because more negative reports is worse.
|
||||
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd).unwrap();
|
||||
let bip_cdf = bip_normal.cdf(bridge_ips_today as f64);
|
||||
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
|
||||
let nr_cdf = 1.0 - nr_normal.cdf(negative_reports_today as f64);
|
||||
// If we have 0 standard deviation, we need another way to
|
||||
// evaluate each variable
|
||||
let bip_test = if bridge_ips_sd > 0.0 {
|
||||
bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha
|
||||
} else {
|
||||
// Consider the bridge blocked if its usage dropped by more
|
||||
// than 1 bin. (Note that the mean is the exact value
|
||||
// because we had no deviation.)
|
||||
(bridge_ips_today as f64) < bridge_ips_mean - 8.0
|
||||
};
|
||||
let nr_test = if negative_reports_sd > 0.0 {
|
||||
// We use CCDF because more negative reports is worse.
|
||||
(1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha
|
||||
} else {
|
||||
// Consider the bridge blocked negative reports increase by
|
||||
// more than 1 after a long static period. (Note that the
|
||||
// mean is the exact value because we had no deviation.)
|
||||
(negative_reports_today as f64) > negative_reports_mean + 1.0
|
||||
};
|
||||
|
||||
// For now, just look at each variable in isolation
|
||||
// TODO: How do we do a multivariate normal CDF?
|
||||
bip_cdf < alpha || nr_cdf < alpha
|
||||
// Return true if any test concluded the bridge is blocked
|
||||
bip_test || nr_test
|
||||
}
|
||||
|
||||
/// Evaluate invite-only bridge with lv3+ users submitting positive reports
|
||||
|
@ -393,17 +420,17 @@ impl Analyzer for NormalAnalyzer {
|
|||
]));
|
||||
*/
|
||||
|
||||
// Model each variable in isolation. We use 1 - the CDF for
|
||||
// Model each variable in isolation. We use the CCDF for
|
||||
// negative reports because more negative reports is worse.
|
||||
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd).unwrap();
|
||||
let bip_cdf = bip_normal.cdf(bridge_ips_today as f64);
|
||||
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
|
||||
let nr_cdf = 1.0 - nr_normal.cdf(negative_reports_today as f64);
|
||||
let nr_ccdf = 1.0 - nr_normal.cdf(negative_reports_today as f64);
|
||||
let pr_normal = Normal::new(positive_reports_mean, positive_reports_sd).unwrap();
|
||||
let pr_cdf = pr_normal.cdf(positive_reports_today as f64);
|
||||
|
||||
// For now, just look at each variable in isolation
|
||||
// TODO: How do we do a multivariate normal CDF?
|
||||
bip_cdf < alpha || nr_cdf < alpha || pr_cdf < alpha
|
||||
bip_cdf < alpha || nr_ccdf < alpha || pr_cdf < alpha
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue