diff --git a/src/analysis.rs b/src/analysis.rs new file mode 100644 index 0000000..6f0c3a6 --- /dev/null +++ b/src/analysis.rs @@ -0,0 +1,346 @@ +use crate::{get_date, BridgeInfo, BridgeInfoType}; +use lox_library::proto::{level_up::LEVEL_INTERVAL, trust_promotion::UNTRUSTED_INTERVAL}; +use nalgebra::DVector; +use statrs::distribution::{Continuous, MultivariateNormal}; +use std::{ + cmp::min, + collections::{BTreeMap, HashSet}, +}; + +/// Provides a function for predicting which countries block this bridge +pub trait Analyzer { + /// Evaluate open-entry bridge. Returns true if blocked, false otherwise. + fn stage_one( + &self, + confidence: f64, + bridge_ips: &[u32], + bridge_ips_today: u32, + negative_reports: &[u32], + negative_reports_today: u32, + ) -> bool; + + /// Evaluate invite-only bridge without positive reports. Return true if + /// blocked, false otherwise. + fn stage_two( + &self, + confidence: f64, + bridge_ips: &[u32], + bridge_ips_today: u32, + negative_reports: &[u32], + negative_reports_today: u32, + ) -> bool; + + /// Evaluate invite-only bridge with positive reports. Return true if + /// blocked, false otherwise. + fn stage_three( + &self, + confidence: f64, + bridge_ips: &[u32], + bridge_ips_today: u32, + negative_reports: &[u32], + negative_reports_today: u32, + positive_reports: &[u32], + positive_reports_today: u32, + ) -> bool; +} + +/// Accepts an analyzer, information about a bridge, and a confidence value. +/// Returns a set of country codes where the bridge is believed to be blocked. +pub fn blocked_in( + analyzer: &dyn Analyzer, + bridge_info: &BridgeInfo, + confidence: f64, +) -> HashSet { + // TODO: Re-evaluate past days if we have backdated reports + let mut blocked_in = HashSet::::new(); + let today = get_date(); + let age = bridge_info.first_seen - today; + for (country, info) in &bridge_info.info_by_country { + if info.blocked { + // Assume bridges never become unblocked + blocked_in.insert(country.to_string()); + } else { + // Get today's values + let new_map_binding = BTreeMap::::new(); + // TODO: Evaluate on yesterday if we don't have data for today? + let today_info = match info.info_by_day.get(&today) { + Some(v) => v, + None => &new_map_binding, + }; + let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) { + Some(v) => *v, + None => 0, + }; + let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports) { + Some(v) => *v, + None => 0, + }; + let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports) { + Some(v) => *v, + None => 0, + }; + + let num_days = min(age, UNTRUSTED_INTERVAL); + + // Get time series for last num_days + let mut bridge_ips = vec![0; num_days as usize]; + let mut negative_reports = vec![0; num_days as usize]; + let mut positive_reports = vec![0; num_days as usize]; + + for i in 0..num_days { + let date = today - num_days + i - 1; + let new_map_binding = BTreeMap::::new(); + let day_info = match info.info_by_day.get(&date) { + Some(v) => v, + None => &new_map_binding, + }; + bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) { + Some(&v) => v, + None => 0, + }; + negative_reports[i as usize] = match day_info.get(&BridgeInfoType::NegativeReports) + { + Some(&v) => v, + None => 0, + }; + positive_reports[i as usize] = match day_info.get(&BridgeInfoType::PositiveReports) + { + Some(&v) => v, + None => 0, + }; + } + + // Evaluate using appropriate stage based on age of the bridge + if age < UNTRUSTED_INTERVAL { + // open-entry bridge + if analyzer.stage_one( + confidence, + &bridge_ips, + bridge_ips_today, + &negative_reports, + negative_reports_today, + ) { + blocked_in.insert(country.to_string()); + } + } else if age + < UNTRUSTED_INTERVAL + LEVEL_INTERVAL[1] + LEVEL_INTERVAL[2] + UNTRUSTED_INTERVAL + { + // invite-only bridge without 30+ days of historical data on + // positive reports + if analyzer.stage_two( + confidence, + &bridge_ips, + bridge_ips_today, + &negative_reports, + negative_reports_today, + ) { + blocked_in.insert(country.to_string()); + } + } else { + // invite-only bridge that has been up long enough that it + // might have 30+ days of historical data on positive reports + if analyzer.stage_three( + confidence, + &bridge_ips, + bridge_ips_today, + &negative_reports, + negative_reports_today, + &positive_reports, + positive_reports_today, + ) { + blocked_in.insert(country.to_string()); + } + } + } + } + blocked_in +} + +// Analyzer implementations + +/// Dummy example that never thinks bridges are blocked +pub struct ExampleAnalyzer {} + +impl Analyzer for ExampleAnalyzer { + fn stage_one( + &self, + _confidence: f64, + _bridge_ips: &[u32], + _bridge_ips_today: u32, + _negative_reports: &[u32], + _negative_reports_today: u32, + ) -> bool { + false + } + + fn stage_two( + &self, + _confidence: f64, + _bridge_ips: &[u32], + _bridge_ips_today: u32, + _negative_reports: &[u32], + _negative_reports_today: u32, + ) -> bool { + false + } + + fn stage_three( + &self, + _confidence: f64, + _bridge_ips: &[u32], + _bridge_ips_today: u32, + _negative_reports: &[u32], + _negative_reports_today: u32, + _positive_reports: &[u32], + _positive_reports_today: u32, + ) -> bool { + false + } +} + +/// Model data as multivariate normal distribution +pub struct NormalAnalyzer { + max_threshold: u32, + scaling_factor: f64, +} + +impl NormalAnalyzer { + pub fn new(max_threshold: u32, scaling_factor: f64) -> Self { + Self { + max_threshold, + scaling_factor, + } + } + + fn mean_vector_and_covariance_matrix(data: &[&[u32]]) -> (Vec, Vec) { + let n = data.len(); + + // Compute mean vector + let mean_vec = { + let mut mean_vec = Vec::::new(); + for var in data { + mean_vec.push({ + let mut sum = 0.0; + for count in *var { + sum += *count as f64; + } + sum / var.len() as f64 + }); + } + mean_vec + }; + + // Compute covariance matrix + let cov_mat = { + let mut cov_mat = Vec::::new(); + // We don't need to recompute Syx, but we currently do + for i in 0..n { + for j in 0..n { + cov_mat.push({ + let var1 = data[i]; + let var1_mean = mean_vec[i]; + + let var2 = data[j]; + let var2_mean = mean_vec[j]; + + assert_eq!(var1.len(), var2.len()); + + let mut sum = 0.0; + for index in 0..var1.len() { + sum += + (var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean); + } + sum / var1.len() as f64 + }); + } + } + cov_mat + }; + + (mean_vec, cov_mat) + } +} + +impl Analyzer for NormalAnalyzer { + /// Evaluate open-entry bridge based on only today's data + fn stage_one( + &self, + _confidence: f64, + _bridge_ips: &[u32], + bridge_ips_today: u32, + _negative_reports: &[u32], + negative_reports_today: u32, + ) -> bool { + negative_reports_today > self.max_threshold + || f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today) + } + + /// Evaluate invite-only bridge based on last 30 days + fn stage_two( + &self, + confidence: f64, + bridge_ips: &[u32], + bridge_ips_today: u32, + negative_reports: &[u32], + negative_reports_today: u32, + ) -> bool { + assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize); + assert_eq!(bridge_ips.len(), negative_reports.len()); + + let (mean_vec, cov_mat) = + Self::mean_vector_and_covariance_matrix(&[bridge_ips, negative_reports]); + let bridge_ips_mean = mean_vec[0]; + let negative_reports_mean = mean_vec[1]; + + let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap(); + if mvn.pdf(&DVector::from_vec(vec![ + bridge_ips_today as f64, + negative_reports_today as f64, + ])) < confidence + { + (negative_reports_today as f64) > negative_reports_mean + || (bridge_ips_today as f64) < bridge_ips_mean + } else { + false + } + } + + /// Evaluate invite-only bridge with lv3+ users submitting positive reports + fn stage_three( + &self, + confidence: f64, + bridge_ips: &[u32], + bridge_ips_today: u32, + negative_reports: &[u32], + negative_reports_today: u32, + positive_reports: &[u32], + positive_reports_today: u32, + ) -> bool { + assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize); + assert_eq!(bridge_ips.len(), negative_reports.len()); + assert_eq!(bridge_ips.len(), positive_reports.len()); + + let (mean_vec, cov_mat) = Self::mean_vector_and_covariance_matrix(&[ + bridge_ips, + negative_reports, + positive_reports, + ]); + let bridge_ips_mean = mean_vec[0]; + let negative_reports_mean = mean_vec[1]; + let positive_reports_mean = mean_vec[2]; + + let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap(); + if mvn.pdf(&DVector::from_vec(vec![ + bridge_ips_today as f64, + negative_reports_today as f64, + positive_reports_today as f64, + ])) < confidence + { + (negative_reports_today as f64) > negative_reports_mean + || (bridge_ips_today as f64) < bridge_ips_mean + || (positive_reports_today as f64) < positive_reports_mean + } else { + false + } + } +} diff --git a/src/analyzer.rs b/src/analyzer.rs deleted file mode 100644 index 666383d..0000000 --- a/src/analyzer.rs +++ /dev/null @@ -1,267 +0,0 @@ -use crate::{get_date, BridgeInfo, BridgeInfoType}; -use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL; -use nalgebra::DVector; -use statrs::distribution::{Continuous, MultivariateNormal}; -use std::collections::{BTreeMap, HashSet}; - -/// Provides a function for predicting which countries block this bridge -pub trait Analyzer { - fn blocked_in(&self, bridge_info: &BridgeInfo, confidence: f64) -> HashSet; -} - -/// Dummy example that just tells us about blockages we already know about -pub struct ExampleAnalyzer {} - -impl Analyzer for ExampleAnalyzer { - fn blocked_in(&self, bridge_info: &BridgeInfo, _confidence: f64) -> HashSet { - let mut blocked_in = HashSet::::new(); - for (country, info) in &bridge_info.info_by_country { - if info.blocked { - blocked_in.insert(country.to_string()); - } - } - blocked_in - } -} - -/// Model data as multivariate normal distribution -pub struct NormalAnalyzer { - max_threshold: u32, - scaling_factor: f64, -} - -impl NormalAnalyzer { - pub fn new(max_threshold: u32, scaling_factor: f64) -> Self { - Self { - max_threshold, - scaling_factor, - } - } - - fn mean_vector_and_covariance_matrix(data: &[&[u32]]) -> (Vec, Vec) { - let n = data.len(); - - // Compute mean vector - let mean_vec = { - let mut mean_vec = Vec::::new(); - for var in data { - mean_vec.push({ - let mut sum = 0.0; - for count in *var { - sum += *count as f64; - } - sum / var.len() as f64 - }); - } - mean_vec - }; - - // Compute covariance matrix - let cov_mat = { - let mut cov_mat = Vec::::new(); - // We don't need to recompute Syx, but we currently do - for i in 0..n { - for j in 0..n { - cov_mat.push({ - let var1 = data[i]; - let var1_mean = mean_vec[i]; - - let var2 = data[j]; - let var2_mean = mean_vec[j]; - - assert_eq!(var1.len(), var2.len()); - - let mut sum = 0.0; - for index in 0..var1.len() { - sum += - (var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean); - } - sum / var1.len() as f64 - }); - } - } - cov_mat - }; - - (mean_vec, cov_mat) - } - - /// Evaluate open-entry bridge based on only today's data - fn stage_one(&self, bridge_ips_today: u32, negative_reports_today: u32) -> bool { - negative_reports_today > self.max_threshold - || f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today) - } - - /// Evaluate invite-only bridge based on last 30 days - fn stage_two( - &self, - confidence: f64, - bridge_ips: &[u32], - bridge_ips_today: u32, - negative_reports: &[u32], - negative_reports_today: u32, - ) -> bool { - assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize); - assert_eq!(bridge_ips.len(), negative_reports.len()); - - let (mean_vec, cov_mat) = - Self::mean_vector_and_covariance_matrix(&[bridge_ips, negative_reports]); - let bridge_ips_mean = mean_vec[0]; - let negative_reports_mean = mean_vec[1]; - - let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap(); - if mvn.pdf(&DVector::from_vec(vec![ - bridge_ips_today as f64, - negative_reports_today as f64, - ])) < confidence - { - (negative_reports_today as f64) > negative_reports_mean - || (bridge_ips_today as f64) < bridge_ips_mean - } else { - false - } - } - - /// Evaluate invite-only bridge with lv3+ users submitting positive reports - fn stage_three( - &self, - confidence: f64, - bridge_ips: &[u32], - bridge_ips_today: u32, - negative_reports: &[u32], - negative_reports_today: u32, - positive_reports: &[u32], - positive_reports_today: u32, - ) -> bool { - assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize); - assert_eq!(bridge_ips.len(), negative_reports.len()); - assert_eq!(bridge_ips.len(), positive_reports.len()); - - let (mean_vec, cov_mat) = Self::mean_vector_and_covariance_matrix(&[ - bridge_ips, - negative_reports, - positive_reports, - ]); - let bridge_ips_mean = mean_vec[0]; - let negative_reports_mean = mean_vec[1]; - let positive_reports_mean = mean_vec[2]; - - let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap(); - if mvn.pdf(&DVector::from_vec(vec![ - bridge_ips_today as f64, - negative_reports_today as f64, - positive_reports_today as f64, - ])) < confidence - { - (negative_reports_today as f64) > negative_reports_mean - || (bridge_ips_today as f64) < bridge_ips_mean - || (positive_reports_today as f64) < positive_reports_mean - } else { - false - } - } -} - -impl Analyzer for NormalAnalyzer { - fn blocked_in(&self, bridge_info: &BridgeInfo, confidence: f64) -> HashSet { - // TODO: Re-evaluate past days if we have backdated reports - let mut blocked_in = HashSet::::new(); - let today = get_date(); - let age = bridge_info.first_seen - today; - for (country, info) in &bridge_info.info_by_country { - if info.blocked { - // Assume bridges never become unblocked - blocked_in.insert(country.to_string()); - } else { - // Get today's values - let new_map_binding = BTreeMap::::new(); - // TODO: Evaluate on yesterday if we don't have data for today? - let today_info = match info.info_by_day.get(&today) { - Some(v) => v, - None => &new_map_binding, - }; - let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) { - Some(v) => *v, - None => 0, - }; - let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports) - { - Some(v) => *v, - None => 0, - }; - let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports) - { - Some(v) => *v, - None => 0, - }; - - if age < UNTRUSTED_INTERVAL { - // open-entry bridge - if self.stage_one(bridge_ips_today, negative_reports_today) { - blocked_in.insert(country.to_string()); - } - } else { - // invite-only bridge - let mut bridge_ips = [0; UNTRUSTED_INTERVAL as usize]; - let mut negative_reports = [0; UNTRUSTED_INTERVAL as usize]; - let mut positive_reports = [0; UNTRUSTED_INTERVAL as usize]; - let mut stage_3 = false; - - // Populate time series - for i in 0..UNTRUSTED_INTERVAL { - let date = today - UNTRUSTED_INTERVAL + i - 1; - let new_map_binding = BTreeMap::::new(); - let day_info = match info.info_by_day.get(&date) { - Some(v) => v, - None => &new_map_binding, - }; - bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) { - Some(v) => *v, - None => 0, - }; - negative_reports[i as usize] = - match day_info.get(&BridgeInfoType::NegativeReports) { - Some(v) => *v, - None => 0, - }; - positive_reports[i as usize] = - match day_info.get(&BridgeInfoType::PositiveReports) { - Some(v) => { - stage_3 = true; - *v - } - None => 0, - }; - } - - if stage_3 { - // We've seen positive reports - if self.stage_three( - confidence, - &bridge_ips, - bridge_ips_today, - &negative_reports, - negative_reports_today, - &positive_reports, - positive_reports_today, - ) { - blocked_in.insert(country.to_string()); - } - } else { - // We have not seen positive reports - if self.stage_two( - confidence, - &bridge_ips, - bridge_ips_today, - &negative_reports, - negative_reports_today, - ) { - blocked_in.insert(country.to_string()); - } - } - } - } - } - blocked_in - } -} diff --git a/src/bin/server.rs b/src/bin/server.rs index 1e8ac0e..0f8255f 100644 --- a/src/bin/server.rs +++ b/src/bin/server.rs @@ -86,7 +86,7 @@ async fn update_daily_info( update_positive_reports(&db, &distributors).await; let new_blockages = guess_blockages( &db, - &analyzer::NormalAnalyzer::new(max_threshold, scaling_factor), + &analysis::NormalAnalyzer::new(max_threshold, scaling_factor), confidence, ); report_blockages(&distributors, new_blockages).await; diff --git a/src/lib.rs b/src/lib.rs index ccfa862..6394d41 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,14 +11,14 @@ use std::{ fmt, }; -pub mod analyzer; +pub mod analysis; pub mod bridge_verification_info; pub mod extra_info; pub mod negative_report; pub mod positive_report; pub mod request_handler; -use analyzer::Analyzer; +use analysis::Analyzer; use extra_info::*; use negative_report::*; use positive_report::*; @@ -583,7 +583,7 @@ pub fn guess_blockages( let mut bridge_info: BridgeInfo = bincode::deserialize(&db.get(fingerprint).unwrap().unwrap()).unwrap(); let mut new_blockages = HashSet::::new(); - let blocked_in = analyzer.blocked_in(&bridge_info, confidence); + let blocked_in = analysis::blocked_in(analyzer, &bridge_info, confidence); for country in blocked_in { let bridge_country_info = bridge_info.info_by_country.get_mut(&country).unwrap(); if !bridge_country_info.blocked {