Refactor analysis for efficiency
This commit is contained in:
parent
6b6836dbae
commit
659b8fa16c
109
src/analysis.rs
109
src/analysis.rs
|
@ -309,35 +309,44 @@ impl Analyzer for NormalAnalyzer {
|
||||||
|
|
||||||
let alpha = 1.0 - confidence;
|
let alpha = 1.0 - confidence;
|
||||||
|
|
||||||
let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips);
|
// Evaluate based on negative reports
|
||||||
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
|
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
|
||||||
|
|
||||||
// Model negative reports separately
|
|
||||||
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd);
|
|
||||||
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
|
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
|
||||||
|
if negative_reports_sd > 0.0 {
|
||||||
// If we have 0 standard deviation, we need another way to
|
|
||||||
// evaluate each variable
|
|
||||||
let bip_test = if bridge_ips_sd > 0.0 {
|
|
||||||
bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha
|
|
||||||
} else {
|
|
||||||
// Consider the bridge blocked if its usage dropped by more
|
|
||||||
// than 1 bin. (Note that the mean is the exact value
|
|
||||||
// because we had no deviation.)
|
|
||||||
(bridge_ips_today as f64) < bridge_ips_mean - 8.0
|
|
||||||
};
|
|
||||||
let nr_test = if negative_reports_sd > 0.0 {
|
|
||||||
// We use CCDF because more negative reports is worse.
|
// We use CCDF because more negative reports is worse.
|
||||||
(1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha
|
if (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
|
// If the standard deviation is 0, we need another option.
|
||||||
// Consider the bridge blocked negative reports increase by
|
// Consider the bridge blocked negative reports increase by
|
||||||
// more than 1 after a long static period. (Note that the
|
// more than 1 after a long static period. (Note that the
|
||||||
// mean is the exact value because we had no deviation.)
|
// mean is the exact value because we had no deviation.)
|
||||||
(negative_reports_today as f64) > negative_reports_mean + 1.0
|
if (negative_reports_today as f64) > negative_reports_mean + 1.0 {
|
||||||
};
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Return true if any test concluded the bridge is blocked
|
// Evaluate based on bridge stats
|
||||||
bip_test || nr_test
|
let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips);
|
||||||
|
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd);
|
||||||
|
if bridge_ips_sd > 0.0 {
|
||||||
|
if bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// If the standard deviation is 0, we need another option.
|
||||||
|
// Consider the bridge blocked if its usage dropped by more
|
||||||
|
// than 1 bin. (Note that the mean is the exact value
|
||||||
|
// because we had no deviation.)
|
||||||
|
if (bridge_ips_today as f64) < bridge_ips_mean - 8.0 {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If none of the tests concluded that the bridge is blocked,
|
||||||
|
// return false
|
||||||
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Evaluate invite-only bridge with lv3+ users submitting positive reports
|
/// Evaluate invite-only bridge with lv3+ users submitting positive reports
|
||||||
|
@ -357,19 +366,29 @@ impl Analyzer for NormalAnalyzer {
|
||||||
|
|
||||||
let alpha = 1.0 - confidence;
|
let alpha = 1.0 - confidence;
|
||||||
|
|
||||||
// Model bridge IPs and positive reports with multivariate
|
// Evaluate based on negative reports. It is better to compute
|
||||||
// normal distribution
|
// negative reports test first because the positive test may be
|
||||||
let (mean_vec, cov_mat) = Self::stats(&[bridge_ips, positive_reports]);
|
// expensive.
|
||||||
let mvn = MultivariateNormal::new(mean_vec, cov_mat);
|
|
||||||
|
|
||||||
// Model negative reports separately
|
|
||||||
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
|
let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
|
||||||
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
|
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
|
||||||
|
if negative_reports_sd > 0.0 {
|
||||||
|
// We use CCDF because more negative reports is worse.
|
||||||
|
if (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Consider the bridge blocked negative reports increase by
|
||||||
|
// more than 1 after a long static period. (Note that the
|
||||||
|
// mean is the exact value because we had no deviation.)
|
||||||
|
if (negative_reports_today as f64) > negative_reports_mean + 1.0 {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// If we have 0 standard deviation or a covariance matrix that
|
// Evaluate based on bridge stats and positive reports.
|
||||||
// is not positive definite, we need another way to evaluate
|
let (mean_vec, cov_mat) = Self::stats(&[bridge_ips, positive_reports]);
|
||||||
// each variable
|
let mvn = MultivariateNormal::new(mean_vec, cov_mat);
|
||||||
let positive_test = if mvn.is_ok() {
|
if mvn.is_ok() {
|
||||||
let mvn = mvn.unwrap();
|
let mvn = mvn.unwrap();
|
||||||
|
|
||||||
// Estimate the CDF by integrating the PDF by hand with step
|
// Estimate the CDF by integrating the PDF by hand with step
|
||||||
|
@ -380,27 +399,27 @@ impl Analyzer for NormalAnalyzer {
|
||||||
cdf += mvn.pdf(&DVector::from_vec(vec![bip as f64, pr as f64]));
|
cdf += mvn.pdf(&DVector::from_vec(vec![bip as f64, pr as f64]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
cdf < alpha
|
if cdf < alpha {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// Ignore positive reports and compute as in stage 2
|
// If we have 0 standard deviation or a covariance matrix
|
||||||
self.stage_two(
|
// that is not positive definite, we need another way to
|
||||||
|
// evaluate each variable. Ignore positive reports and
|
||||||
|
// compute as in stage 2
|
||||||
|
if self.stage_two(
|
||||||
confidence,
|
confidence,
|
||||||
bridge_ips,
|
bridge_ips,
|
||||||
bridge_ips_today,
|
bridge_ips_today,
|
||||||
negative_reports,
|
negative_reports,
|
||||||
negative_reports_today,
|
negative_reports_today,
|
||||||
)
|
) {
|
||||||
};
|
return true;
|
||||||
let nr_test = if negative_reports_sd > 0.0 {
|
}
|
||||||
// We use CCDF because more negative reports is worse.
|
|
||||||
(1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha
|
|
||||||
} else {
|
|
||||||
// Consider the bridge blocked negative reports increase by
|
|
||||||
// more than 1 after a long static period. (Note that the
|
|
||||||
// mean is the exact value because we had no deviation.)
|
|
||||||
(negative_reports_today as f64) > negative_reports_mean + 1.0
|
|
||||||
};
|
};
|
||||||
|
|
||||||
positive_test || nr_test
|
// If none of the tests concluded that the bridge is blocked,
|
||||||
|
// return false
|
||||||
|
false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue