use crate::{BridgeInfo, BridgeInfoType}; use lox_library::proto::{level_up::LEVEL_INTERVAL, trust_promotion::UNTRUSTED_INTERVAL}; use nalgebra::DVector; use statrs::distribution::{Continuous, MultivariateNormal}; use std::{ cmp::min, collections::{BTreeMap, HashSet}, }; const SCALE_BRIDGE_IPS: u32 = 8; /// Provides a function for predicting which countries block this bridge pub trait Analyzer { /// Evaluate open-entry bridge. Returns true if blocked, false otherwise. fn stage_one( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, ) -> bool; /// Evaluate invite-only bridge without positive reports. Return true if /// blocked, false otherwise. fn stage_two( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, ) -> bool; /// Evaluate invite-only bridge with positive reports. Return true if /// blocked, false otherwise. fn stage_three( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, positive_reports: &[u32], positive_reports_today: u32, ) -> bool; } /// Accepts an analyzer, information about a bridge, and a confidence value. /// Returns a set of country codes where the bridge is believed to be blocked. pub fn blocked_in( analyzer: &dyn Analyzer, bridge_info: &BridgeInfo, confidence: f64, date: u32, ) -> HashSet { // TODO: Re-evaluate past days if we have backdated reports let mut blocked_in = HashSet::::new(); let today = date; let age = today - bridge_info.first_seen; for (country, info) in &bridge_info.info_by_country { if info.blocked { // Assume bridges never become unblocked blocked_in.insert(country.to_string()); } else { // Get today's values let new_map_binding = BTreeMap::::new(); // TODO: Evaluate on yesterday if we don't have data for today? let today_info = match info.info_by_day.get(&today) { Some(v) => v, None => &new_map_binding, }; let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) { Some(&v) => v / SCALE_BRIDGE_IPS, None => 0, }; let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports) { Some(&v) => v, None => 0, }; let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports) { Some(&v) => v, None => 0, }; let num_days = min(age, UNTRUSTED_INTERVAL); // Get time series for last num_days let mut bridge_ips = vec![0; num_days as usize]; let mut negative_reports = vec![0; num_days as usize]; let mut positive_reports = vec![0; num_days as usize]; for i in 0..num_days { let date = today - num_days + i - 1; let new_map_binding = BTreeMap::::new(); let day_info = match info.info_by_day.get(&date) { Some(v) => v, None => &new_map_binding, }; bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) { Some(&v) => v / SCALE_BRIDGE_IPS, None => 0, }; negative_reports[i as usize] = match day_info.get(&BridgeInfoType::NegativeReports) { Some(&v) => v, None => 0, }; positive_reports[i as usize] = match day_info.get(&BridgeInfoType::PositiveReports) { Some(&v) => v, None => 0, }; } // Evaluate using appropriate stage based on age of the bridge if age < UNTRUSTED_INTERVAL { // open-entry bridge if analyzer.stage_one( confidence, &bridge_ips, bridge_ips_today, &negative_reports, negative_reports_today, ) { blocked_in.insert(country.to_string()); } } else if age < UNTRUSTED_INTERVAL + LEVEL_INTERVAL[1] + LEVEL_INTERVAL[2] + UNTRUSTED_INTERVAL { // invite-only bridge without 30+ days of historical data on // positive reports if analyzer.stage_two( confidence, &bridge_ips, bridge_ips_today, &negative_reports, negative_reports_today, ) { blocked_in.insert(country.to_string()); } } else { // invite-only bridge that has been up long enough that it // might have 30+ days of historical data on positive reports if analyzer.stage_three( confidence, &bridge_ips, bridge_ips_today, &negative_reports, negative_reports_today, &positive_reports, positive_reports_today, ) { blocked_in.insert(country.to_string()); } } } } blocked_in } // Analyzer implementations /// Dummy example that never thinks bridges are blocked pub struct ExampleAnalyzer {} impl Analyzer for ExampleAnalyzer { fn stage_one( &self, _confidence: f64, _bridge_ips: &[u32], _bridge_ips_today: u32, _negative_reports: &[u32], _negative_reports_today: u32, ) -> bool { false } fn stage_two( &self, _confidence: f64, _bridge_ips: &[u32], _bridge_ips_today: u32, _negative_reports: &[u32], _negative_reports_today: u32, ) -> bool { false } fn stage_three( &self, _confidence: f64, _bridge_ips: &[u32], _bridge_ips_today: u32, _negative_reports: &[u32], _negative_reports_today: u32, _positive_reports: &[u32], _positive_reports_today: u32, ) -> bool { false } } /// Model data as multivariate normal distribution pub struct NormalAnalyzer { max_threshold: u32, scaling_factor: f64, } impl NormalAnalyzer { pub fn new(max_threshold: u32, scaling_factor: f64) -> Self { Self { max_threshold, scaling_factor, } } // Returns the mean vector, vector of individual standard deviations, and // covariance matrix fn stats(data: &[&[u32]]) -> (Vec, Vec, Vec) { let n = data.len(); // Compute mean and standard deviation vectors let (mean_vec, sd_vec) = { let mut mean_vec = Vec::::new(); let mut sd_vec = Vec::::new(); for var in data { // Compute mean let mut sum = 0.0; for count in *var { sum += *count as f64; } let mean = sum / var.len() as f64; // Compute standard deviation let mut sum = 0.0; for count in *var { sum += (*count as f64 - mean).powi(2); } let sd = (sum / var.len() as f64).sqrt(); mean_vec.push(mean); sd_vec.push(sd); } (mean_vec, sd_vec) }; // Compute covariance matrix let cov_mat = { let mut cov_mat = Vec::::new(); // We don't need to recompute Syx, but we currently do for i in 0..n { for j in 0..n { cov_mat.push({ let var1 = data[i]; let var1_mean = mean_vec[i]; let var2 = data[j]; let var2_mean = mean_vec[j]; assert_eq!(var1.len(), var2.len()); let mut sum = 0.0; for index in 0..var1.len() { sum += (var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean); } sum / var1.len() as f64 }); } } cov_mat }; (mean_vec, sd_vec, cov_mat) } } impl Analyzer for NormalAnalyzer { /// Evaluate open-entry bridge based on only today's data fn stage_one( &self, _confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, _negative_reports: &[u32], negative_reports_today: u32, ) -> bool { negative_reports_today > self.max_threshold || f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today) * SCALE_BRIDGE_IPS as f64 } /// Evaluate invite-only bridge based on last 30 days fn stage_two( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, ) -> bool { assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize); assert_eq!(bridge_ips.len(), negative_reports.len()); let alpha = 1.0 - confidence; let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports]); let bridge_ips_mean = mean_vec[0]; let negative_reports_mean = mean_vec[1]; let bridge_ips_sd = sd_vec[0]; let negative_reports_sd = sd_vec[1]; let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap(); println!( "evaluate mvn.pdf of [{},{}]", bridge_ips_today as f64, negative_reports_today as f64 ); println!( "{}", mvn.pdf(&DVector::from_vec(vec![ bridge_ips_today as f64, negative_reports_today as f64 ])) ); if mvn.pdf(&DVector::from_vec(vec![ bridge_ips_today as f64, negative_reports_today as f64, ])) < alpha { (negative_reports_today as f64) > negative_reports_mean + negative_reports_sd || (bridge_ips_today as f64) < bridge_ips_mean - bridge_ips_sd } else { false } } /// Evaluate invite-only bridge with lv3+ users submitting positive reports fn stage_three( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, positive_reports: &[u32], positive_reports_today: u32, ) -> bool { assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize); assert_eq!(bridge_ips.len(), negative_reports.len()); assert_eq!(bridge_ips.len(), positive_reports.len()); let alpha = 1.0 - confidence; let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports, positive_reports]); let bridge_ips_mean = mean_vec[0]; let negative_reports_mean = mean_vec[1]; let positive_reports_mean = mean_vec[2]; let bridge_ips_sd = sd_vec[0]; let negative_reports_sd = sd_vec[1]; let positive_reports_sd = sd_vec[2]; let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap(); if mvn.pdf(&DVector::from_vec(vec![ bridge_ips_today as f64, negative_reports_today as f64, positive_reports_today as f64, ])) < alpha { (negative_reports_today as f64) > negative_reports_mean + negative_reports_sd || (bridge_ips_today as f64) < bridge_ips_mean - bridge_ips_sd || (positive_reports_today as f64) < positive_reports_mean - positive_reports_sd } else { false } } }