Stage 2: Model as two distributions, handle 0 standard deviation
This commit is contained in:
parent
97d4622cd4
commit
3512adc425
|
@ -216,6 +216,28 @@ impl NormalAnalyzer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn mean(data: &[u32]) -> f64 {
|
||||||
|
let mut sum = 0.0;
|
||||||
|
for count in data {
|
||||||
|
sum += *count as f64;
|
||||||
|
}
|
||||||
|
sum / data.len() as f64
|
||||||
|
}
|
||||||
|
|
||||||
|
fn std_dev(data: &[u32], mean: f64) -> f64 {
|
||||||
|
let mut sum = 0.0;
|
||||||
|
for count in data {
|
||||||
|
sum += (*count as f64 - mean).powi(2);
|
||||||
|
}
|
||||||
|
(sum / data.len() as f64).sqrt()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mean_and_std_dev(data: &[u32]) -> (f64, f64) {
|
||||||
|
let mean = Self::mean(data);
|
||||||
|
let std = Self::std_dev(data, mean);
|
||||||
|
(mean, std)
|
||||||
|
}
|
||||||
|
|
||||||
// Returns the mean vector, vector of individual standard deviations, and
|
// Returns the mean vector, vector of individual standard deviations, and
|
||||||
// covariance matrix. If the standard deviation for a variable is 0 and/or
|
// covariance matrix. If the standard deviation for a variable is 0 and/or
|
||||||
// the covariance matrix is not positive definite, add some noise to the
|
// the covariance matrix is not positive definite, add some noise to the
|
||||||
|
@ -318,7 +340,7 @@ impl Analyzer for NormalAnalyzer {
|
||||||
|| f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today)
|
|| f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Evaluate invite-only bridge based on last 30 days
|
/// Evaluate invite-only bridge based on historical data
|
||||||
fn stage_two(
|
fn stage_two(
|
||||||
&self,
|
&self,
|
||||||
confidence: f64,
|
confidence: f64,
|
||||||
|
@ -332,30 +354,35 @@ impl Analyzer for NormalAnalyzer {
|
||||||
|
|
||||||
let alpha = 1.0 - confidence;
|
let alpha = 1.0 - confidence;
|
||||||
|
|
||||||
let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports]);
|
let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips);
|
||||||
let bridge_ips_mean = mean_vec[0];
|
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
|
||||||
let negative_reports_mean = mean_vec[1];
|
|
||||||
let bridge_ips_sd = sd_vec[0];
|
|
||||||
let negative_reports_sd = sd_vec[1];
|
|
||||||
|
|
||||||
/*
|
// Model each variable with a normal distribution.
|
||||||
let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
|
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd);
|
||||||
let pdf = mvn.pdf(&DVector::from_vec(vec![
|
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
|
||||||
bridge_ips_today as f64,
|
|
||||||
negative_reports_today as f64,
|
|
||||||
]));
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Model each variable in isolation. We use 1 - the CDF for
|
// If we have 0 standard deviation, we need another way to
|
||||||
// negative reports because more negative reports is worse.
|
// evaluate each variable
|
||||||
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd).unwrap();
|
let bip_test = if bridge_ips_sd > 0.0 {
|
||||||
let bip_cdf = bip_normal.cdf(bridge_ips_today as f64);
|
bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha
|
||||||
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
|
} else {
|
||||||
let nr_cdf = 1.0 - nr_normal.cdf(negative_reports_today as f64);
|
// Consider the bridge blocked if its usage dropped by more
|
||||||
|
// than 1 bin. (Note that the mean is the exact value
|
||||||
|
// because we had no deviation.)
|
||||||
|
(bridge_ips_today as f64) < bridge_ips_mean - 8.0
|
||||||
|
};
|
||||||
|
let nr_test = if negative_reports_sd > 0.0 {
|
||||||
|
// We use CCDF because more negative reports is worse.
|
||||||
|
(1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha
|
||||||
|
} else {
|
||||||
|
// Consider the bridge blocked negative reports increase by
|
||||||
|
// more than 1 after a long static period. (Note that the
|
||||||
|
// mean is the exact value because we had no deviation.)
|
||||||
|
(negative_reports_today as f64) > negative_reports_mean + 1.0
|
||||||
|
};
|
||||||
|
|
||||||
// For now, just look at each variable in isolation
|
// Return true if any test concluded the bridge is blocked
|
||||||
// TODO: How do we do a multivariate normal CDF?
|
bip_test || nr_test
|
||||||
bip_cdf < alpha || nr_cdf < alpha
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Evaluate invite-only bridge with lv3+ users submitting positive reports
|
/// Evaluate invite-only bridge with lv3+ users submitting positive reports
|
||||||
|
@ -393,17 +420,17 @@ impl Analyzer for NormalAnalyzer {
|
||||||
]));
|
]));
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// Model each variable in isolation. We use 1 - the CDF for
|
// Model each variable in isolation. We use the CCDF for
|
||||||
// negative reports because more negative reports is worse.
|
// negative reports because more negative reports is worse.
|
||||||
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd).unwrap();
|
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd).unwrap();
|
||||||
let bip_cdf = bip_normal.cdf(bridge_ips_today as f64);
|
let bip_cdf = bip_normal.cdf(bridge_ips_today as f64);
|
||||||
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
|
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
|
||||||
let nr_cdf = 1.0 - nr_normal.cdf(negative_reports_today as f64);
|
let nr_ccdf = 1.0 - nr_normal.cdf(negative_reports_today as f64);
|
||||||
let pr_normal = Normal::new(positive_reports_mean, positive_reports_sd).unwrap();
|
let pr_normal = Normal::new(positive_reports_mean, positive_reports_sd).unwrap();
|
||||||
let pr_cdf = pr_normal.cdf(positive_reports_today as f64);
|
let pr_cdf = pr_normal.cdf(positive_reports_today as f64);
|
||||||
|
|
||||||
// For now, just look at each variable in isolation
|
// For now, just look at each variable in isolation
|
||||||
// TODO: How do we do a multivariate normal CDF?
|
// TODO: How do we do a multivariate normal CDF?
|
||||||
bip_cdf < alpha || nr_cdf < alpha || pr_cdf < alpha
|
bip_cdf < alpha || nr_ccdf < alpha || pr_cdf < alpha
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue