use crate::{BridgeInfo, BridgeInfoType}; use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL; use nalgebra::{Cholesky, DMatrix, DVector}; use rand::Rng; use statrs::distribution::{ContinuousCDF, MultivariateNormal, Normal}; use std::{ cmp::{max, min}, collections::{BTreeMap, HashSet}, }; /// Provides a function for predicting which countries block this bridge pub trait Analyzer { /// Evaluate open-entry bridge. Returns true if blocked, false otherwise. fn stage_one( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, ) -> bool; /// Evaluate invite-only bridge without positive reports. Return true if /// blocked, false otherwise. fn stage_two( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, ) -> bool; /// Evaluate invite-only bridge with positive reports. Return true if /// blocked, false otherwise. fn stage_three( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, positive_reports: &[u32], positive_reports_today: u32, ) -> bool; } /// Accepts an analyzer, information about a bridge, and a confidence value. /// Returns a set of country codes where the bridge is believed to be blocked. pub fn blocked_in( analyzer: &dyn Analyzer, bridge_info: &BridgeInfo, confidence: f64, date: u32, min_historical_days: u32, max_historical_days: u32, ) -> HashSet { let mut blocked_in = HashSet::::new(); let today = date; for (country, info) in &bridge_info.info_by_country { let age = today - info.first_seen; if info.blocked { // Assume bridges never become unblocked blocked_in.insert(country.to_string()); } else { // Get today's values let new_map_binding = BTreeMap::::new(); // TODO: Evaluate on yesterday if we don't have data for today? let today_info = match info.info_by_day.get(&today) { Some(v) => v, None => &new_map_binding, }; let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) { Some(&v) => v, None => 0, }; let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports) { Some(&v) => v, None => 0, }; let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports) { Some(&v) => v, None => 0, }; let num_days = min(age, max_historical_days); // Get time series for last num_days let mut bridge_ips = vec![0; num_days as usize]; let mut negative_reports = vec![0; num_days as usize]; let mut positive_reports = vec![0; num_days as usize]; for i in 0..num_days { let date = today - num_days + i - 1; let new_map_binding = BTreeMap::::new(); let day_info = match info.info_by_day.get(&date) { Some(v) => v, None => &new_map_binding, }; bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) { Some(&v) => v, None => 0, }; negative_reports[i as usize] = match day_info.get(&BridgeInfoType::NegativeReports) { Some(&v) => v, None => 0, }; positive_reports[i as usize] = match day_info.get(&BridgeInfoType::PositiveReports) { Some(&v) => v, None => 0, }; } // Evaluate using appropriate stage based on age of the bridge if age < UNTRUSTED_INTERVAL || age < min_historical_days { // open-entry bridge and/or not enough days of // historical days for stages 2 and 3 if analyzer.stage_one( confidence, &bridge_ips, bridge_ips_today, &negative_reports, negative_reports_today, ) { blocked_in.insert(country.to_string()); } } else if info.first_pr.is_none() || today < info.first_pr.unwrap() + min_historical_days { // invite-only bridge without min_historical_days of // historical data on positive reports if analyzer.stage_two( confidence, &bridge_ips, bridge_ips_today, &negative_reports, negative_reports_today, ) { blocked_in.insert(country.to_string()); } } else { // invite-only bridge that has min_historical_days or // more of historical data since the first positive report if analyzer.stage_three( confidence, &bridge_ips, bridge_ips_today, &negative_reports, negative_reports_today, &positive_reports, positive_reports_today, ) { blocked_in.insert(country.to_string()); } } } } blocked_in } // Analyzer implementations /// Dummy example that never thinks bridges are blocked pub struct ExampleAnalyzer {} impl Analyzer for ExampleAnalyzer { fn stage_one( &self, _confidence: f64, _bridge_ips: &[u32], _bridge_ips_today: u32, _negative_reports: &[u32], _negative_reports_today: u32, ) -> bool { false } fn stage_two( &self, _confidence: f64, _bridge_ips: &[u32], _bridge_ips_today: u32, _negative_reports: &[u32], _negative_reports_today: u32, ) -> bool { false } fn stage_three( &self, _confidence: f64, _bridge_ips: &[u32], _bridge_ips_today: u32, _negative_reports: &[u32], _negative_reports_today: u32, _positive_reports: &[u32], _positive_reports_today: u32, ) -> bool { false } } /// Model data as multivariate normal distribution pub struct NormalAnalyzer { max_threshold: u32, scaling_factor: f64, } impl NormalAnalyzer { pub fn new(max_threshold: u32, scaling_factor: f64) -> Self { Self { max_threshold, scaling_factor, } } fn mean(data: &[u32]) -> f64 { let mut sum = 0.0; for count in data { sum += *count as f64; } sum / data.len() as f64 } fn std_dev(data: &[u32], mean: f64) -> f64 { let mut sum = 0.0; for count in data { sum += (*count as f64 - mean).powi(2); } (sum / data.len() as f64).sqrt() } fn mean_and_std_dev(data: &[u32]) -> (f64, f64) { let mean = Self::mean(data); let std = Self::std_dev(data, mean); (mean, std) } // Returns the mean vector, vector of individual standard deviations, and // covariance matrix. If the standard deviation for a variable is 0 and/or // the covariance matrix is not positive definite, add some noise to the // data and recompute. fn stats(data: &[&[u32]]) -> (Vec, Vec, Vec) { let n = data.len(); // Compute mean and standard deviation vectors let (mean_vec, sd_vec) = { let mut mean_vec = Vec::::new(); let mut sd_vec = Vec::::new(); for var in data { // Compute mean let mut sum = 0.0; for count in *var { sum += *count as f64; } let mean = sum / var.len() as f64; // Compute standard deviation let mut sum = 0.0; for count in *var { sum += (*count as f64 - mean).powi(2); } let sd = (sum / var.len() as f64).sqrt(); mean_vec.push(mean); sd_vec.push(sd); } (mean_vec, sd_vec) }; // Compute covariance matrix let cov_mat = { let mut cov_mat = Vec::::new(); // We don't need to recompute Syx, but we currently do for i in 0..n { for j in 0..n { cov_mat.push({ let var1 = data[i]; let var1_mean = mean_vec[i]; let var2 = data[j]; let var2_mean = mean_vec[j]; assert_eq!(var1.len(), var2.len()); let mut sum = 0.0; for index in 0..var1.len() { sum += (var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean); } sum / (var1.len() - 1) as f64 }); } } cov_mat }; // If any standard deviation is 0 or the covariance matrix is not // positive definite, add some noise and recompute. let mut recompute = false; for sd in &sd_vec { if *sd <= 0.0 { recompute = true; } } if Cholesky::new(DMatrix::from_vec(n, n, cov_mat.clone())).is_none() { recompute = true; } if !recompute { (mean_vec, sd_vec, cov_mat) } else { // Add random noise and recompute let mut new_data = vec![vec![0; data[0].len()]; n]; let mut rng = rand::thread_rng(); for i in 0..n { for j in 0..data[i].len() { // Add 1 to some randomly selected values new_data[i][j] = data[i][j] + rng.gen_range(0..=1); } } // Compute stats on modified data Self::stats(&new_data.iter().map(Vec::as_slice).collect::>()) } } } impl Analyzer for NormalAnalyzer { /// Evaluate open-entry bridge based on only today's data fn stage_one( &self, _confidence: f64, _bridge_ips: &[u32], bridge_ips_today: u32, _negative_reports: &[u32], negative_reports_today: u32, ) -> bool { negative_reports_today > self.max_threshold || f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today) } /// Evaluate invite-only bridge based on historical data fn stage_two( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, ) -> bool { assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize); assert_eq!(bridge_ips.len(), negative_reports.len()); let alpha = 1.0 - confidence; let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips); let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports); // Model each variable with a normal distribution. let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd); let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd); // If we have 0 standard deviation, we need another way to // evaluate each variable let bip_test = if bridge_ips_sd > 0.0 { bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha } else { // Consider the bridge blocked if its usage dropped by more // than 1 bin. (Note that the mean is the exact value // because we had no deviation.) (bridge_ips_today as f64) < bridge_ips_mean - 8.0 }; let nr_test = if negative_reports_sd > 0.0 { // We use CCDF because more negative reports is worse. (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha } else { // Consider the bridge blocked negative reports increase by // more than 1 after a long static period. (Note that the // mean is the exact value because we had no deviation.) (negative_reports_today as f64) > negative_reports_mean + 1.0 }; // Return true if any test concluded the bridge is blocked bip_test || nr_test } /// Evaluate invite-only bridge with lv3+ users submitting positive reports fn stage_three( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, positive_reports: &[u32], positive_reports_today: u32, ) -> bool { assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize); assert_eq!(bridge_ips.len(), negative_reports.len()); assert_eq!(bridge_ips.len(), positive_reports.len()); let alpha = 1.0 - confidence; let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports, positive_reports]); let bridge_ips_mean = mean_vec[0]; let negative_reports_mean = mean_vec[1]; let positive_reports_mean = mean_vec[2]; let bridge_ips_sd = sd_vec[0]; let negative_reports_sd = sd_vec[1]; let positive_reports_sd = sd_vec[2]; /* let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap(); let pdf = mvn.pdf(&DVector::from_vec(vec![ bridge_ips_today as f64, negative_reports_today as f64, positive_reports_today as f64, ])); */ // Model each variable in isolation. We use the CCDF for // negative reports because more negative reports is worse. let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd).unwrap(); let bip_cdf = bip_normal.cdf(bridge_ips_today as f64); let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap(); let nr_ccdf = 1.0 - nr_normal.cdf(negative_reports_today as f64); let pr_normal = Normal::new(positive_reports_mean, positive_reports_sd).unwrap(); let pr_cdf = pr_normal.cdf(positive_reports_today as f64); // For now, just look at each variable in isolation // TODO: How do we do a multivariate normal CDF? bip_cdf < alpha || nr_ccdf < alpha || pr_cdf < alpha } }