use crate::{BridgeInfo, BridgeInfoType}; use lox_library::proto::{level_up::LEVEL_INTERVAL, trust_promotion::UNTRUSTED_INTERVAL}; use nalgebra::DVector; use statrs::distribution::{Continuous, MultivariateNormal, Normal}; use std::{ cmp::min, collections::{BTreeMap, HashSet}, }; /// Provides a function for predicting which countries block this bridge pub trait Analyzer { /// Evaluate open-entry bridge. Returns true if blocked, false otherwise. fn stage_one( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, ) -> bool; /// Evaluate invite-only bridge without positive reports. Return true if /// blocked, false otherwise. fn stage_two( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, ) -> bool; /// Evaluate invite-only bridge with positive reports. Return true if /// blocked, false otherwise. fn stage_three( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, positive_reports: &[u32], positive_reports_today: u32, ) -> bool; } /// Accepts an analyzer, information about a bridge, and a confidence value. /// Returns a set of country codes where the bridge is believed to be blocked. pub fn blocked_in( analyzer: &dyn Analyzer, bridge_info: &BridgeInfo, confidence: f64, date: u32, ) -> HashSet { // TODO: Re-evaluate past days if we have backdated reports let mut blocked_in = HashSet::::new(); let today = date; let age = today - bridge_info.first_seen; for (country, info) in &bridge_info.info_by_country { if info.blocked { // Assume bridges never become unblocked blocked_in.insert(country.to_string()); } else { // Get today's values let new_map_binding = BTreeMap::::new(); // TODO: Evaluate on yesterday if we don't have data for today? let today_info = match info.info_by_day.get(&today) { Some(v) => v, None => &new_map_binding, }; let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) { Some(&v) => v, None => 0, }; let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports) { Some(&v) => v, None => 0, }; let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports) { Some(&v) => v, None => 0, }; let num_days = min(age, UNTRUSTED_INTERVAL); // Get time series for last num_days let mut bridge_ips = vec![0; num_days as usize]; let mut negative_reports = vec![0; num_days as usize]; let mut positive_reports = vec![0; num_days as usize]; for i in 0..num_days { let date = today - num_days + i - 1; let new_map_binding = BTreeMap::::new(); let day_info = match info.info_by_day.get(&date) { Some(v) => v, None => &new_map_binding, }; bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) { Some(&v) => v, None => 0, }; negative_reports[i as usize] = match day_info.get(&BridgeInfoType::NegativeReports) { Some(&v) => v, None => 0, }; positive_reports[i as usize] = match day_info.get(&BridgeInfoType::PositiveReports) { Some(&v) => v, None => 0, }; } // Evaluate using appropriate stage based on age of the bridge if age < UNTRUSTED_INTERVAL { // open-entry bridge if analyzer.stage_one( confidence, &bridge_ips, bridge_ips_today, &negative_reports, negative_reports_today, ) { blocked_in.insert(country.to_string()); } } else if age < UNTRUSTED_INTERVAL + LEVEL_INTERVAL[1] + LEVEL_INTERVAL[2] + UNTRUSTED_INTERVAL { // invite-only bridge without 30+ days of historical data on // positive reports if analyzer.stage_two( confidence, &bridge_ips, bridge_ips_today, &negative_reports, negative_reports_today, ) { blocked_in.insert(country.to_string()); } } else { // invite-only bridge that has been up long enough that it // might have 30+ days of historical data on positive reports if analyzer.stage_three( confidence, &bridge_ips, bridge_ips_today, &negative_reports, negative_reports_today, &positive_reports, positive_reports_today, ) { blocked_in.insert(country.to_string()); } } } } blocked_in } // Analyzer implementations /// Dummy example that never thinks bridges are blocked pub struct ExampleAnalyzer {} impl Analyzer for ExampleAnalyzer { fn stage_one( &self, _confidence: f64, _bridge_ips: &[u32], _bridge_ips_today: u32, _negative_reports: &[u32], _negative_reports_today: u32, ) -> bool { false } fn stage_two( &self, _confidence: f64, _bridge_ips: &[u32], _bridge_ips_today: u32, _negative_reports: &[u32], _negative_reports_today: u32, ) -> bool { false } fn stage_three( &self, _confidence: f64, _bridge_ips: &[u32], _bridge_ips_today: u32, _negative_reports: &[u32], _negative_reports_today: u32, _positive_reports: &[u32], _positive_reports_today: u32, ) -> bool { false } } /// Model data as multivariate normal distribution pub struct NormalAnalyzer { max_threshold: u32, scaling_factor: f64, } impl NormalAnalyzer { pub fn new(max_threshold: u32, scaling_factor: f64) -> Self { Self { max_threshold, scaling_factor, } } // Returns the mean vector, vector of individual standard deviations, and // covariance matrix fn stats(data: &[&[u32]]) -> (Vec, Vec, Vec) { let n = data.len(); // Compute mean and standard deviation vectors let (mean_vec, sd_vec) = { let mut mean_vec = Vec::::new(); let mut sd_vec = Vec::::new(); for var in data { // Compute mean let mut sum = 0.0; for count in *var { sum += *count as f64; } let mean = sum / var.len() as f64; // Compute standard deviation let mut sum = 0.0; for count in *var { sum += (*count as f64 - mean).powi(2); } let sd = (sum / var.len() as f64).sqrt(); mean_vec.push(mean); sd_vec.push(sd); } (mean_vec, sd_vec) }; // Compute covariance matrix let cov_mat = { let mut cov_mat = Vec::::new(); // We don't need to recompute Syx, but we currently do for i in 0..n { for j in 0..n { cov_mat.push({ let var1 = data[i]; let var1_mean = mean_vec[i]; let var2 = data[j]; let var2_mean = mean_vec[j]; assert_eq!(var1.len(), var2.len()); let mut sum = 0.0; for index in 0..var1.len() { sum += (var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean); } sum / (var1.len() - 1) as f64 }); } } cov_mat }; (mean_vec, sd_vec, cov_mat) } } impl Analyzer for NormalAnalyzer { /// Evaluate open-entry bridge based on only today's data fn stage_one( &self, _confidence: f64, _bridge_ips: &[u32], bridge_ips_today: u32, _negative_reports: &[u32], negative_reports_today: u32, ) -> bool { negative_reports_today > self.max_threshold || f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today) } /// Evaluate invite-only bridge based on last 30 days fn stage_two( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, ) -> bool { assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize); assert_eq!(bridge_ips.len(), negative_reports.len()); let alpha = 1.0 - confidence; let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports]); let negative_reports_mean = mean_vec[1]; let bridge_ips_sd = sd_vec[0]; let negative_reports_sd = sd_vec[1]; // Artificially create data for alternative hypothesis let num_days = bridge_ips.len() as usize; let mut bridge_ips_blocked = vec![0; num_days]; let mut negative_reports_blocked = vec![0; num_days]; let bridge_ips_deviation = (2.0 * bridge_ips_sd).round() as u32; for i in 0..num_days { // Suppose bridge stats will go down by 2 SDs bridge_ips_blocked[i] = if bridge_ips_deviation > bridge_ips[i] { 0 } else { bridge_ips[i] - bridge_ips_deviation }; // Suppose negative reports will go up by 2 SDs negative_reports_blocked[i] = negative_reports[i] + (2.0 * negative_reports_sd).round() as u32; } let (mean_vec_blocked, _sd_vec_blocked, cov_mat_blocked) = Self::stats(&[&bridge_ips_blocked, &negative_reports_blocked]); let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap(); let pdf = mvn.pdf(&DVector::from_vec(vec![ bridge_ips_today as f64, negative_reports_today as f64, ])); let mvn_blocked = MultivariateNormal::new(mean_vec_blocked, cov_mat_blocked).unwrap(); let pdf_blocked = mvn_blocked.pdf(&DVector::from_vec(vec![ bridge_ips_today as f64, negative_reports_today as f64, ])); // Also model negative reports in isolation let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap(); let nr_pdf = nr_normal.pdf(negative_reports_today as f64); let nr_normal_blocked = Normal::new( negative_reports_mean + 2.0 * negative_reports_sd, negative_reports_sd, ) .unwrap(); let nr_pdf_blocked = nr_normal_blocked.pdf(negative_reports_today as f64); (pdf / pdf_blocked).ln() < alpha || (nr_pdf / nr_pdf_blocked).ln() < alpha } /// Evaluate invite-only bridge with lv3+ users submitting positive reports fn stage_three( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, positive_reports: &[u32], positive_reports_today: u32, ) -> bool { assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize); assert_eq!(bridge_ips.len(), negative_reports.len()); assert_eq!(bridge_ips.len(), positive_reports.len()); let alpha = 1.0 - confidence; let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports, positive_reports]); let negative_reports_mean = mean_vec[1]; let bridge_ips_sd = sd_vec[0]; let negative_reports_sd = sd_vec[1]; let positive_reports_sd = sd_vec[2]; // Artificially create data for alternative hypothesis let num_days = bridge_ips.len() as usize; let mut bridge_ips_blocked = vec![0; num_days]; let mut negative_reports_blocked = vec![0; num_days]; let mut positive_reports_blocked = vec![0; num_days]; let bridge_ips_deviation = (2.0 * bridge_ips_sd).round() as u32; let positive_reports_deviation = (2.0 * positive_reports_sd).round() as u32; for i in 0..num_days { // Suppose positive reports will go down by 2 SDs positive_reports_blocked[i] = if positive_reports_deviation > positive_reports[i] { 0 } else { positive_reports[i] - positive_reports_deviation }; // Suppose bridge stats will go down by 2 SDs bridge_ips_blocked[i] = if bridge_ips_deviation > bridge_ips[i] { 0 } else { bridge_ips[i] - bridge_ips_deviation }; // Suppose each user who would have submitted a positive report but // didn't submits a negative report instead. negative_reports_blocked[i] = negative_reports[i] + positive_reports[i] - positive_reports_blocked[i]; } let (mean_vec_blocked, _sd_vec_blocked, cov_mat_blocked) = Self::stats(&[ &bridge_ips_blocked, &negative_reports_blocked, &positive_reports_blocked, ]); let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap(); let pdf = mvn.pdf(&DVector::from_vec(vec![ bridge_ips_today as f64, negative_reports_today as f64, positive_reports_today as f64, ])); let mvn_blocked = MultivariateNormal::new(mean_vec_blocked, cov_mat_blocked).unwrap(); let pdf_blocked = mvn_blocked.pdf(&DVector::from_vec(vec![ bridge_ips_today as f64, negative_reports_today as f64, positive_reports_today as f64, ])); // Also model negative reports in isolation let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap(); let nr_pdf = nr_normal.pdf(negative_reports_today as f64); // Note we do NOT make this a function of positive signals let nr_normal_blocked = Normal::new( negative_reports_mean + 2.0 * negative_reports_sd, negative_reports_sd, ) .unwrap(); let nr_pdf_blocked = nr_normal_blocked.pdf(negative_reports_today as f64); (pdf / pdf_blocked).ln() < alpha || (nr_pdf / nr_pdf_blocked).ln() < alpha } }