Refactor analysis for efficiency

This commit is contained in:
Vecna 2024-05-22 16:04:52 -04:00
parent 6b6836dbae
commit 659b8fa16c
1 changed files with 64 additions and 45 deletions

View File

@ -309,35 +309,44 @@ impl Analyzer for NormalAnalyzer {
let alpha = 1.0 - confidence; let alpha = 1.0 - confidence;
let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips); // Evaluate based on negative reports
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports); let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
// Model negative reports separately
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd);
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd); let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
if negative_reports_sd > 0.0 {
// If we have 0 standard deviation, we need another way to
// evaluate each variable
let bip_test = if bridge_ips_sd > 0.0 {
bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha
} else {
// Consider the bridge blocked if its usage dropped by more
// than 1 bin. (Note that the mean is the exact value
// because we had no deviation.)
(bridge_ips_today as f64) < bridge_ips_mean - 8.0
};
let nr_test = if negative_reports_sd > 0.0 {
// We use CCDF because more negative reports is worse. // We use CCDF because more negative reports is worse.
(1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha if (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha {
return true;
}
} else { } else {
// If the standard deviation is 0, we need another option.
// Consider the bridge blocked negative reports increase by // Consider the bridge blocked negative reports increase by
// more than 1 after a long static period. (Note that the // more than 1 after a long static period. (Note that the
// mean is the exact value because we had no deviation.) // mean is the exact value because we had no deviation.)
(negative_reports_today as f64) > negative_reports_mean + 1.0 if (negative_reports_today as f64) > negative_reports_mean + 1.0 {
}; return true;
}
}
// Return true if any test concluded the bridge is blocked // Evaluate based on bridge stats
bip_test || nr_test let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips);
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd);
if bridge_ips_sd > 0.0 {
if bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha {
return true;
}
} else {
// If the standard deviation is 0, we need another option.
// Consider the bridge blocked if its usage dropped by more
// than 1 bin. (Note that the mean is the exact value
// because we had no deviation.)
if (bridge_ips_today as f64) < bridge_ips_mean - 8.0 {
return true;
}
}
// If none of the tests concluded that the bridge is blocked,
// return false
false
} }
/// Evaluate invite-only bridge with lv3+ users submitting positive reports /// Evaluate invite-only bridge with lv3+ users submitting positive reports
@ -357,19 +366,29 @@ impl Analyzer for NormalAnalyzer {
let alpha = 1.0 - confidence; let alpha = 1.0 - confidence;
// Model bridge IPs and positive reports with multivariate // Evaluate based on negative reports. It is better to compute
// normal distribution // negative reports test first because the positive test may be
let (mean_vec, cov_mat) = Self::stats(&[bridge_ips, positive_reports]); // expensive.
let mvn = MultivariateNormal::new(mean_vec, cov_mat);
// Model negative reports separately
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports); let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd); let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
if negative_reports_sd > 0.0 {
// We use CCDF because more negative reports is worse.
if (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha {
return true;
}
} else {
// Consider the bridge blocked negative reports increase by
// more than 1 after a long static period. (Note that the
// mean is the exact value because we had no deviation.)
if (negative_reports_today as f64) > negative_reports_mean + 1.0 {
return true;
}
}
// If we have 0 standard deviation or a covariance matrix that // Evaluate based on bridge stats and positive reports.
// is not positive definite, we need another way to evaluate let (mean_vec, cov_mat) = Self::stats(&[bridge_ips, positive_reports]);
// each variable let mvn = MultivariateNormal::new(mean_vec, cov_mat);
let positive_test = if mvn.is_ok() { if mvn.is_ok() {
let mvn = mvn.unwrap(); let mvn = mvn.unwrap();
// Estimate the CDF by integrating the PDF by hand with step // Estimate the CDF by integrating the PDF by hand with step
@ -380,27 +399,27 @@ impl Analyzer for NormalAnalyzer {
cdf += mvn.pdf(&DVector::from_vec(vec![bip as f64, pr as f64])); cdf += mvn.pdf(&DVector::from_vec(vec![bip as f64, pr as f64]));
} }
} }
cdf < alpha if cdf < alpha {
return true;
}
} else { } else {
// Ignore positive reports and compute as in stage 2 // If we have 0 standard deviation or a covariance matrix
self.stage_two( // that is not positive definite, we need another way to
// evaluate each variable. Ignore positive reports and
// compute as in stage 2
if self.stage_two(
confidence, confidence,
bridge_ips, bridge_ips,
bridge_ips_today, bridge_ips_today,
negative_reports, negative_reports,
negative_reports_today, negative_reports_today,
) ) {
}; return true;
let nr_test = if negative_reports_sd > 0.0 { }
// We use CCDF because more negative reports is worse.
(1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha
} else {
// Consider the bridge blocked negative reports increase by
// more than 1 after a long static period. (Note that the
// mean is the exact value because we had no deviation.)
(negative_reports_today as f64) > negative_reports_mean + 1.0
}; };
positive_test || nr_test // If none of the tests concluded that the bridge is blocked,
// return false
false
} }
} }