Refactor analysis for efficiency

This commit is contained in:
Vecna 2024-05-22 16:04:52 -04:00
parent 6b6836dbae
commit 659b8fa16c
1 changed files with 64 additions and 45 deletions

View File

@ -309,35 +309,44 @@ impl Analyzer for NormalAnalyzer {
let alpha = 1.0 - confidence;
let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips);
// Evaluate based on negative reports
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
// Model negative reports separately
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd);
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
// If we have 0 standard deviation, we need another way to
// evaluate each variable
let bip_test = if bridge_ips_sd > 0.0 {
bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha
} else {
// Consider the bridge blocked if its usage dropped by more
// than 1 bin. (Note that the mean is the exact value
// because we had no deviation.)
(bridge_ips_today as f64) < bridge_ips_mean - 8.0
};
let nr_test = if negative_reports_sd > 0.0 {
if negative_reports_sd > 0.0 {
// We use CCDF because more negative reports is worse.
(1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha
if (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha {
return true;
}
} else {
// If the standard deviation is 0, we need another option.
// Consider the bridge blocked negative reports increase by
// more than 1 after a long static period. (Note that the
// mean is the exact value because we had no deviation.)
(negative_reports_today as f64) > negative_reports_mean + 1.0
};
if (negative_reports_today as f64) > negative_reports_mean + 1.0 {
return true;
}
}
// Return true if any test concluded the bridge is blocked
bip_test || nr_test
// Evaluate based on bridge stats
let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips);
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd);
if bridge_ips_sd > 0.0 {
if bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha {
return true;
}
} else {
// If the standard deviation is 0, we need another option.
// Consider the bridge blocked if its usage dropped by more
// than 1 bin. (Note that the mean is the exact value
// because we had no deviation.)
if (bridge_ips_today as f64) < bridge_ips_mean - 8.0 {
return true;
}
}
// If none of the tests concluded that the bridge is blocked,
// return false
false
}
/// Evaluate invite-only bridge with lv3+ users submitting positive reports
@ -357,19 +366,29 @@ impl Analyzer for NormalAnalyzer {
let alpha = 1.0 - confidence;
// Model bridge IPs and positive reports with multivariate
// normal distribution
let (mean_vec, cov_mat) = Self::stats(&[bridge_ips, positive_reports]);
let mvn = MultivariateNormal::new(mean_vec, cov_mat);
// Model negative reports separately
// Evaluate based on negative reports. It is better to compute
// negative reports test first because the positive test may be
// expensive.
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
if negative_reports_sd > 0.0 {
// We use CCDF because more negative reports is worse.
if (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha {
return true;
}
} else {
// Consider the bridge blocked negative reports increase by
// more than 1 after a long static period. (Note that the
// mean is the exact value because we had no deviation.)
if (negative_reports_today as f64) > negative_reports_mean + 1.0 {
return true;
}
}
// If we have 0 standard deviation or a covariance matrix that
// is not positive definite, we need another way to evaluate
// each variable
let positive_test = if mvn.is_ok() {
// Evaluate based on bridge stats and positive reports.
let (mean_vec, cov_mat) = Self::stats(&[bridge_ips, positive_reports]);
let mvn = MultivariateNormal::new(mean_vec, cov_mat);
if mvn.is_ok() {
let mvn = mvn.unwrap();
// Estimate the CDF by integrating the PDF by hand with step
@ -380,27 +399,27 @@ impl Analyzer for NormalAnalyzer {
cdf += mvn.pdf(&DVector::from_vec(vec![bip as f64, pr as f64]));
}
}
cdf < alpha
if cdf < alpha {
return true;
}
} else {
// Ignore positive reports and compute as in stage 2
self.stage_two(
// If we have 0 standard deviation or a covariance matrix
// that is not positive definite, we need another way to
// evaluate each variable. Ignore positive reports and
// compute as in stage 2
if self.stage_two(
confidence,
bridge_ips,
bridge_ips_today,
negative_reports,
negative_reports_today,
)
};
let nr_test = if negative_reports_sd > 0.0 {
// We use CCDF because more negative reports is worse.
(1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha
} else {
// Consider the bridge blocked negative reports increase by
// more than 1 after a long static period. (Note that the
// mean is the exact value because we had no deviation.)
(negative_reports_today as f64) > negative_reports_mean + 1.0
) {
return true;
}
};
positive_test || nr_test
// If none of the tests concluded that the bridge is blocked,
// return false
false
}
}