Only use CDF tests if today's numbers are worse than average

This commit is contained in:
Vecna 2024-05-27 17:47:02 -04:00
parent 788f1f7f21
commit 11bedfb74a
1 changed files with 81 additions and 64 deletions

View File

@ -311,36 +311,44 @@ impl Analyzer for NormalAnalyzer {
// Evaluate based on negative reports // Evaluate based on negative reports
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports); let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
if negative_reports_sd > 0.0 { // Only use CCDF test if today's numbers are worse than average
// We use CCDF because more negative reports is worse. if (negative_reports_today as f64) > negative_reports_mean {
if (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha { let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
return true; if negative_reports_sd > 0.0 {
} // We use CCDF because more negative reports is worse.
} else { if (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha {
// If the standard deviation is 0, we need another option. return true;
// Consider the bridge blocked negative reports increase by }
// more than 1 after a long static period. (Note that the } else {
// mean is the exact value because we had no deviation.) // If the standard deviation is 0, we need another option.
if (negative_reports_today as f64) > negative_reports_mean + 1.0 { // Consider the bridge blocked negative reports increase by
return true; // more than 1 after a long static period. (Note that the
// mean is the exact value because we had no deviation.)
if (negative_reports_today as f64) > negative_reports_mean + 1.0 {
return true;
}
} }
} }
// Evaluate based on bridge stats // Evaluate based on bridge stats
let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips); let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips);
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd);
if bridge_ips_sd > 0.0 { // Only use CDF test if today's numbers are worse than average
if bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha { if (bridge_ips_today as f64) < bridge_ips_mean {
return true; let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd);
} if bridge_ips_sd > 0.0 {
} else { if bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha {
// If the standard deviation is 0, we need another option. return true;
// Consider the bridge blocked if its usage dropped by more }
// than 1 bin. (Note that the mean is the exact value } else {
// because we had no deviation.) // If the standard deviation is 0, we need another option.
if (bridge_ips_today as f64) < bridge_ips_mean - 8.0 { // Consider the bridge blocked if its usage dropped by more
return true; // than 1 bin. (Note that the mean is the exact value
// because we had no deviation.)
if (bridge_ips_today as f64) < bridge_ips_mean - 8.0 {
return true;
}
} }
} }
@ -370,53 +378,62 @@ impl Analyzer for NormalAnalyzer {
// negative reports test first because the positive test may be // negative reports test first because the positive test may be
// expensive. // expensive.
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports); let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
if negative_reports_sd > 0.0 { // Only use CCDF test if today's numbers are worse than average
// We use CCDF because more negative reports is worse. if (negative_reports_today as f64) > negative_reports_mean {
if (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha { let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
return true; if negative_reports_sd > 0.0 {
} // We use CCDF because more negative reports is worse.
} else { if (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha {
// Consider the bridge blocked negative reports increase by return true;
// more than 1 after a long static period. (Note that the }
// mean is the exact value because we had no deviation.) } else {
if (negative_reports_today as f64) > negative_reports_mean + 1.0 { // Consider the bridge blocked negative reports increase by
return true; // more than 1 after a long static period. (Note that the
// mean is the exact value because we had no deviation.)
if (negative_reports_today as f64) > negative_reports_mean + 1.0 {
return true;
}
} }
} }
// Evaluate based on bridge stats and positive reports. // Evaluate based on bridge stats and positive reports.
let (mean_vec, cov_mat) = Self::stats(&[bridge_ips, positive_reports]); let (mean_vec, cov_mat) = Self::stats(&[bridge_ips, positive_reports]);
let mvn = MultivariateNormal::new(mean_vec, cov_mat);
if mvn.is_ok() {
let mvn = mvn.unwrap();
// Estimate the CDF by integrating the PDF by hand with step // Only use CDF test if today's numbers are worse than average
// size 1 if (bridge_ips_today as f64) < mean_vec[0] || (positive_reports_today as f64) < mean_vec[1]
let mut cdf = 0.0; {
for bip in 0..bridge_ips_today { let mvn = MultivariateNormal::new(mean_vec, cov_mat);
for pr in 0..positive_reports_today { if mvn.is_ok() {
cdf += mvn.pdf(&DVector::from_vec(vec![bip as f64, pr as f64])); let mvn = mvn.unwrap();
// Estimate the CDF by integrating the PDF by hand with step
// size 1
let mut cdf = 0.0;
for bip in 0..bridge_ips_today {
for pr in 0..positive_reports_today {
cdf += mvn.pdf(&DVector::from_vec(vec![bip as f64, pr as f64]));
}
}
if cdf < alpha {
return true;
}
} else {
// If we have 0 standard deviation or a covariance matrix
// that is not positive definite, we need another way to
// evaluate each variable. Ignore positive reports and
// compute as in stage 2
if self.stage_two(
confidence,
bridge_ips,
bridge_ips_today,
negative_reports,
negative_reports_today,
) {
return true;
} }
} }
if cdf < alpha { }
return true;
}
} else {
// If we have 0 standard deviation or a covariance matrix
// that is not positive definite, we need another way to
// evaluate each variable. Ignore positive reports and
// compute as in stage 2
if self.stage_two(
confidence,
bridge_ips,
bridge_ips_today,
negative_reports,
negative_reports_today,
) {
return true;
}
};
// If none of the tests concluded that the bridge is blocked, // If none of the tests concluded that the bridge is blocked,
// return false // return false