Refactor Analyzer trait

This commit is contained in:
Vecna 2024-04-15 13:19:56 -04:00
parent b9abc83d36
commit 1a8b64080d
4 changed files with 350 additions and 271 deletions

346
src/analysis.rs Normal file
View File

@ -0,0 +1,346 @@
use crate::{get_date, BridgeInfo, BridgeInfoType};
use lox_library::proto::{level_up::LEVEL_INTERVAL, trust_promotion::UNTRUSTED_INTERVAL};
use nalgebra::DVector;
use statrs::distribution::{Continuous, MultivariateNormal};
use std::{
cmp::min,
collections::{BTreeMap, HashSet},
};
/// Provides a function for predicting which countries block this bridge
pub trait Analyzer {
/// Evaluate open-entry bridge. Returns true if blocked, false otherwise.
fn stage_one(
&self,
confidence: f64,
bridge_ips: &[u32],
bridge_ips_today: u32,
negative_reports: &[u32],
negative_reports_today: u32,
) -> bool;
/// Evaluate invite-only bridge without positive reports. Return true if
/// blocked, false otherwise.
fn stage_two(
&self,
confidence: f64,
bridge_ips: &[u32],
bridge_ips_today: u32,
negative_reports: &[u32],
negative_reports_today: u32,
) -> bool;
/// Evaluate invite-only bridge with positive reports. Return true if
/// blocked, false otherwise.
fn stage_three(
&self,
confidence: f64,
bridge_ips: &[u32],
bridge_ips_today: u32,
negative_reports: &[u32],
negative_reports_today: u32,
positive_reports: &[u32],
positive_reports_today: u32,
) -> bool;
}
/// Accepts an analyzer, information about a bridge, and a confidence value.
/// Returns a set of country codes where the bridge is believed to be blocked.
pub fn blocked_in(
analyzer: &dyn Analyzer,
bridge_info: &BridgeInfo,
confidence: f64,
) -> HashSet<String> {
// TODO: Re-evaluate past days if we have backdated reports
let mut blocked_in = HashSet::<String>::new();
let today = get_date();
let age = bridge_info.first_seen - today;
for (country, info) in &bridge_info.info_by_country {
if info.blocked {
// Assume bridges never become unblocked
blocked_in.insert(country.to_string());
} else {
// Get today's values
let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
// TODO: Evaluate on yesterday if we don't have data for today?
let today_info = match info.info_by_day.get(&today) {
Some(v) => v,
None => &new_map_binding,
};
let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) {
Some(v) => *v,
None => 0,
};
let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports) {
Some(v) => *v,
None => 0,
};
let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports) {
Some(v) => *v,
None => 0,
};
let num_days = min(age, UNTRUSTED_INTERVAL);
// Get time series for last num_days
let mut bridge_ips = vec![0; num_days as usize];
let mut negative_reports = vec![0; num_days as usize];
let mut positive_reports = vec![0; num_days as usize];
for i in 0..num_days {
let date = today - num_days + i - 1;
let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
let day_info = match info.info_by_day.get(&date) {
Some(v) => v,
None => &new_map_binding,
};
bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) {
Some(&v) => v,
None => 0,
};
negative_reports[i as usize] = match day_info.get(&BridgeInfoType::NegativeReports)
{
Some(&v) => v,
None => 0,
};
positive_reports[i as usize] = match day_info.get(&BridgeInfoType::PositiveReports)
{
Some(&v) => v,
None => 0,
};
}
// Evaluate using appropriate stage based on age of the bridge
if age < UNTRUSTED_INTERVAL {
// open-entry bridge
if analyzer.stage_one(
confidence,
&bridge_ips,
bridge_ips_today,
&negative_reports,
negative_reports_today,
) {
blocked_in.insert(country.to_string());
}
} else if age
< UNTRUSTED_INTERVAL + LEVEL_INTERVAL[1] + LEVEL_INTERVAL[2] + UNTRUSTED_INTERVAL
{
// invite-only bridge without 30+ days of historical data on
// positive reports
if analyzer.stage_two(
confidence,
&bridge_ips,
bridge_ips_today,
&negative_reports,
negative_reports_today,
) {
blocked_in.insert(country.to_string());
}
} else {
// invite-only bridge that has been up long enough that it
// might have 30+ days of historical data on positive reports
if analyzer.stage_three(
confidence,
&bridge_ips,
bridge_ips_today,
&negative_reports,
negative_reports_today,
&positive_reports,
positive_reports_today,
) {
blocked_in.insert(country.to_string());
}
}
}
}
blocked_in
}
// Analyzer implementations
/// Dummy example that never thinks bridges are blocked
pub struct ExampleAnalyzer {}
impl Analyzer for ExampleAnalyzer {
fn stage_one(
&self,
_confidence: f64,
_bridge_ips: &[u32],
_bridge_ips_today: u32,
_negative_reports: &[u32],
_negative_reports_today: u32,
) -> bool {
false
}
fn stage_two(
&self,
_confidence: f64,
_bridge_ips: &[u32],
_bridge_ips_today: u32,
_negative_reports: &[u32],
_negative_reports_today: u32,
) -> bool {
false
}
fn stage_three(
&self,
_confidence: f64,
_bridge_ips: &[u32],
_bridge_ips_today: u32,
_negative_reports: &[u32],
_negative_reports_today: u32,
_positive_reports: &[u32],
_positive_reports_today: u32,
) -> bool {
false
}
}
/// Model data as multivariate normal distribution
pub struct NormalAnalyzer {
max_threshold: u32,
scaling_factor: f64,
}
impl NormalAnalyzer {
pub fn new(max_threshold: u32, scaling_factor: f64) -> Self {
Self {
max_threshold,
scaling_factor,
}
}
fn mean_vector_and_covariance_matrix(data: &[&[u32]]) -> (Vec<f64>, Vec<f64>) {
let n = data.len();
// Compute mean vector
let mean_vec = {
let mut mean_vec = Vec::<f64>::new();
for var in data {
mean_vec.push({
let mut sum = 0.0;
for count in *var {
sum += *count as f64;
}
sum / var.len() as f64
});
}
mean_vec
};
// Compute covariance matrix
let cov_mat = {
let mut cov_mat = Vec::<f64>::new();
// We don't need to recompute Syx, but we currently do
for i in 0..n {
for j in 0..n {
cov_mat.push({
let var1 = data[i];
let var1_mean = mean_vec[i];
let var2 = data[j];
let var2_mean = mean_vec[j];
assert_eq!(var1.len(), var2.len());
let mut sum = 0.0;
for index in 0..var1.len() {
sum +=
(var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean);
}
sum / var1.len() as f64
});
}
}
cov_mat
};
(mean_vec, cov_mat)
}
}
impl Analyzer for NormalAnalyzer {
/// Evaluate open-entry bridge based on only today's data
fn stage_one(
&self,
_confidence: f64,
_bridge_ips: &[u32],
bridge_ips_today: u32,
_negative_reports: &[u32],
negative_reports_today: u32,
) -> bool {
negative_reports_today > self.max_threshold
|| f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today)
}
/// Evaluate invite-only bridge based on last 30 days
fn stage_two(
&self,
confidence: f64,
bridge_ips: &[u32],
bridge_ips_today: u32,
negative_reports: &[u32],
negative_reports_today: u32,
) -> bool {
assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
assert_eq!(bridge_ips.len(), negative_reports.len());
let (mean_vec, cov_mat) =
Self::mean_vector_and_covariance_matrix(&[bridge_ips, negative_reports]);
let bridge_ips_mean = mean_vec[0];
let negative_reports_mean = mean_vec[1];
let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
if mvn.pdf(&DVector::from_vec(vec![
bridge_ips_today as f64,
negative_reports_today as f64,
])) < confidence
{
(negative_reports_today as f64) > negative_reports_mean
|| (bridge_ips_today as f64) < bridge_ips_mean
} else {
false
}
}
/// Evaluate invite-only bridge with lv3+ users submitting positive reports
fn stage_three(
&self,
confidence: f64,
bridge_ips: &[u32],
bridge_ips_today: u32,
negative_reports: &[u32],
negative_reports_today: u32,
positive_reports: &[u32],
positive_reports_today: u32,
) -> bool {
assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
assert_eq!(bridge_ips.len(), negative_reports.len());
assert_eq!(bridge_ips.len(), positive_reports.len());
let (mean_vec, cov_mat) = Self::mean_vector_and_covariance_matrix(&[
bridge_ips,
negative_reports,
positive_reports,
]);
let bridge_ips_mean = mean_vec[0];
let negative_reports_mean = mean_vec[1];
let positive_reports_mean = mean_vec[2];
let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
if mvn.pdf(&DVector::from_vec(vec![
bridge_ips_today as f64,
negative_reports_today as f64,
positive_reports_today as f64,
])) < confidence
{
(negative_reports_today as f64) > negative_reports_mean
|| (bridge_ips_today as f64) < bridge_ips_mean
|| (positive_reports_today as f64) < positive_reports_mean
} else {
false
}
}
}

View File

@ -1,267 +0,0 @@
use crate::{get_date, BridgeInfo, BridgeInfoType};
use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL;
use nalgebra::DVector;
use statrs::distribution::{Continuous, MultivariateNormal};
use std::collections::{BTreeMap, HashSet};
/// Provides a function for predicting which countries block this bridge
pub trait Analyzer {
fn blocked_in(&self, bridge_info: &BridgeInfo, confidence: f64) -> HashSet<String>;
}
/// Dummy example that just tells us about blockages we already know about
pub struct ExampleAnalyzer {}
impl Analyzer for ExampleAnalyzer {
fn blocked_in(&self, bridge_info: &BridgeInfo, _confidence: f64) -> HashSet<String> {
let mut blocked_in = HashSet::<String>::new();
for (country, info) in &bridge_info.info_by_country {
if info.blocked {
blocked_in.insert(country.to_string());
}
}
blocked_in
}
}
/// Model data as multivariate normal distribution
pub struct NormalAnalyzer {
max_threshold: u32,
scaling_factor: f64,
}
impl NormalAnalyzer {
pub fn new(max_threshold: u32, scaling_factor: f64) -> Self {
Self {
max_threshold,
scaling_factor,
}
}
fn mean_vector_and_covariance_matrix(data: &[&[u32]]) -> (Vec<f64>, Vec<f64>) {
let n = data.len();
// Compute mean vector
let mean_vec = {
let mut mean_vec = Vec::<f64>::new();
for var in data {
mean_vec.push({
let mut sum = 0.0;
for count in *var {
sum += *count as f64;
}
sum / var.len() as f64
});
}
mean_vec
};
// Compute covariance matrix
let cov_mat = {
let mut cov_mat = Vec::<f64>::new();
// We don't need to recompute Syx, but we currently do
for i in 0..n {
for j in 0..n {
cov_mat.push({
let var1 = data[i];
let var1_mean = mean_vec[i];
let var2 = data[j];
let var2_mean = mean_vec[j];
assert_eq!(var1.len(), var2.len());
let mut sum = 0.0;
for index in 0..var1.len() {
sum +=
(var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean);
}
sum / var1.len() as f64
});
}
}
cov_mat
};
(mean_vec, cov_mat)
}
/// Evaluate open-entry bridge based on only today's data
fn stage_one(&self, bridge_ips_today: u32, negative_reports_today: u32) -> bool {
negative_reports_today > self.max_threshold
|| f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today)
}
/// Evaluate invite-only bridge based on last 30 days
fn stage_two(
&self,
confidence: f64,
bridge_ips: &[u32],
bridge_ips_today: u32,
negative_reports: &[u32],
negative_reports_today: u32,
) -> bool {
assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
assert_eq!(bridge_ips.len(), negative_reports.len());
let (mean_vec, cov_mat) =
Self::mean_vector_and_covariance_matrix(&[bridge_ips, negative_reports]);
let bridge_ips_mean = mean_vec[0];
let negative_reports_mean = mean_vec[1];
let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
if mvn.pdf(&DVector::from_vec(vec![
bridge_ips_today as f64,
negative_reports_today as f64,
])) < confidence
{
(negative_reports_today as f64) > negative_reports_mean
|| (bridge_ips_today as f64) < bridge_ips_mean
} else {
false
}
}
/// Evaluate invite-only bridge with lv3+ users submitting positive reports
fn stage_three(
&self,
confidence: f64,
bridge_ips: &[u32],
bridge_ips_today: u32,
negative_reports: &[u32],
negative_reports_today: u32,
positive_reports: &[u32],
positive_reports_today: u32,
) -> bool {
assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
assert_eq!(bridge_ips.len(), negative_reports.len());
assert_eq!(bridge_ips.len(), positive_reports.len());
let (mean_vec, cov_mat) = Self::mean_vector_and_covariance_matrix(&[
bridge_ips,
negative_reports,
positive_reports,
]);
let bridge_ips_mean = mean_vec[0];
let negative_reports_mean = mean_vec[1];
let positive_reports_mean = mean_vec[2];
let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
if mvn.pdf(&DVector::from_vec(vec![
bridge_ips_today as f64,
negative_reports_today as f64,
positive_reports_today as f64,
])) < confidence
{
(negative_reports_today as f64) > negative_reports_mean
|| (bridge_ips_today as f64) < bridge_ips_mean
|| (positive_reports_today as f64) < positive_reports_mean
} else {
false
}
}
}
impl Analyzer for NormalAnalyzer {
fn blocked_in(&self, bridge_info: &BridgeInfo, confidence: f64) -> HashSet<String> {
// TODO: Re-evaluate past days if we have backdated reports
let mut blocked_in = HashSet::<String>::new();
let today = get_date();
let age = bridge_info.first_seen - today;
for (country, info) in &bridge_info.info_by_country {
if info.blocked {
// Assume bridges never become unblocked
blocked_in.insert(country.to_string());
} else {
// Get today's values
let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
// TODO: Evaluate on yesterday if we don't have data for today?
let today_info = match info.info_by_day.get(&today) {
Some(v) => v,
None => &new_map_binding,
};
let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) {
Some(v) => *v,
None => 0,
};
let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports)
{
Some(v) => *v,
None => 0,
};
let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports)
{
Some(v) => *v,
None => 0,
};
if age < UNTRUSTED_INTERVAL {
// open-entry bridge
if self.stage_one(bridge_ips_today, negative_reports_today) {
blocked_in.insert(country.to_string());
}
} else {
// invite-only bridge
let mut bridge_ips = [0; UNTRUSTED_INTERVAL as usize];
let mut negative_reports = [0; UNTRUSTED_INTERVAL as usize];
let mut positive_reports = [0; UNTRUSTED_INTERVAL as usize];
let mut stage_3 = false;
// Populate time series
for i in 0..UNTRUSTED_INTERVAL {
let date = today - UNTRUSTED_INTERVAL + i - 1;
let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
let day_info = match info.info_by_day.get(&date) {
Some(v) => v,
None => &new_map_binding,
};
bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) {
Some(v) => *v,
None => 0,
};
negative_reports[i as usize] =
match day_info.get(&BridgeInfoType::NegativeReports) {
Some(v) => *v,
None => 0,
};
positive_reports[i as usize] =
match day_info.get(&BridgeInfoType::PositiveReports) {
Some(v) => {
stage_3 = true;
*v
}
None => 0,
};
}
if stage_3 {
// We've seen positive reports
if self.stage_three(
confidence,
&bridge_ips,
bridge_ips_today,
&negative_reports,
negative_reports_today,
&positive_reports,
positive_reports_today,
) {
blocked_in.insert(country.to_string());
}
} else {
// We have not seen positive reports
if self.stage_two(
confidence,
&bridge_ips,
bridge_ips_today,
&negative_reports,
negative_reports_today,
) {
blocked_in.insert(country.to_string());
}
}
}
}
}
blocked_in
}
}

View File

@ -86,7 +86,7 @@ async fn update_daily_info(
update_positive_reports(&db, &distributors).await;
let new_blockages = guess_blockages(
&db,
&analyzer::NormalAnalyzer::new(max_threshold, scaling_factor),
&analysis::NormalAnalyzer::new(max_threshold, scaling_factor),
confidence,
);
report_blockages(&distributors, new_blockages).await;

View File

@ -11,14 +11,14 @@ use std::{
fmt,
};
pub mod analyzer;
pub mod analysis;
pub mod bridge_verification_info;
pub mod extra_info;
pub mod negative_report;
pub mod positive_report;
pub mod request_handler;
use analyzer::Analyzer;
use analysis::Analyzer;
use extra_info::*;
use negative_report::*;
use positive_report::*;
@ -583,7 +583,7 @@ pub fn guess_blockages(
let mut bridge_info: BridgeInfo =
bincode::deserialize(&db.get(fingerprint).unwrap().unwrap()).unwrap();
let mut new_blockages = HashSet::<String>::new();
let blocked_in = analyzer.blocked_in(&bridge_info, confidence);
let blocked_in = analysis::blocked_in(analyzer, &bridge_info, confidence);
for country in blocked_in {
let bridge_country_info = bridge_info.info_by_country.get_mut(&country).unwrap();
if !bridge_country_info.blocked {