diff --git a/src/analysis.rs b/src/analysis.rs index 33dd1ca..031b942 100644 --- a/src/analysis.rs +++ b/src/analysis.rs @@ -1,7 +1,7 @@ use crate::{BridgeInfo, BridgeInfoType}; use lox_library::proto::{level_up::LEVEL_INTERVAL, trust_promotion::UNTRUSTED_INTERVAL}; use nalgebra::DVector; -use statrs::distribution::{Continuous, MultivariateNormal}; +use statrs::distribution::{Continuous, MultivariateNormal, Normal}; use std::{ cmp::min, collections::{BTreeMap, HashSet}, @@ -302,22 +302,52 @@ impl Analyzer for NormalAnalyzer { let alpha = 1.0 - confidence; let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports]); - let bridge_ips_mean = mean_vec[0]; let negative_reports_mean = mean_vec[1]; let bridge_ips_sd = sd_vec[0]; let negative_reports_sd = sd_vec[1]; + // Artificially create data for alternative hypothesis + let num_days = bridge_ips.len() as usize; + let mut bridge_ips_blocked = vec![0; num_days]; + let mut negative_reports_blocked = vec![0; num_days]; + let bridge_ips_deviation = (2.0 * bridge_ips_sd).round() as u32; + for i in 0..num_days { + // Suppose bridge stats will go down by 2 SDs + bridge_ips_blocked[i] = if bridge_ips_deviation > bridge_ips[i] { + 0 + } else { + bridge_ips[i] - bridge_ips_deviation + }; + // Suppose negative reports will go up by 2 SDs + negative_reports_blocked[i] = + negative_reports[i] + (2.0 * negative_reports_sd).round() as u32; + } + let (mean_vec_blocked, _sd_vec_blocked, cov_mat_blocked) = + Self::stats(&[&bridge_ips_blocked, &negative_reports_blocked]); + let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap(); let pdf = mvn.pdf(&DVector::from_vec(vec![ bridge_ips_today as f64, negative_reports_today as f64, ])); - if pdf < alpha { - (negative_reports_today as f64) > negative_reports_mean + negative_reports_sd - || (bridge_ips_today as f64) < bridge_ips_mean - bridge_ips_sd - } else { - false - } + + let mvn_blocked = MultivariateNormal::new(mean_vec_blocked, cov_mat_blocked).unwrap(); + let pdf_blocked = mvn_blocked.pdf(&DVector::from_vec(vec![ + bridge_ips_today as f64, + negative_reports_today as f64, + ])); + + // Also model negative reports in isolation + let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap(); + let nr_pdf = nr_normal.pdf(negative_reports_today as f64); + let nr_normal_blocked = Normal::new( + negative_reports_mean + 2.0 * negative_reports_sd, + negative_reports_sd, + ) + .unwrap(); + let nr_pdf_blocked = nr_normal_blocked.pdf(negative_reports_today as f64); + + (pdf / pdf_blocked).ln() < alpha || (nr_pdf / nr_pdf_blocked).ln() < alpha } /// Evaluate invite-only bridge with lv3+ users submitting positive reports @@ -339,25 +369,67 @@ impl Analyzer for NormalAnalyzer { let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports, positive_reports]); - let bridge_ips_mean = mean_vec[0]; let negative_reports_mean = mean_vec[1]; - let positive_reports_mean = mean_vec[2]; let bridge_ips_sd = sd_vec[0]; let negative_reports_sd = sd_vec[1]; let positive_reports_sd = sd_vec[2]; + // Artificially create data for alternative hypothesis + let num_days = bridge_ips.len() as usize; + let mut bridge_ips_blocked = vec![0; num_days]; + let mut negative_reports_blocked = vec![0; num_days]; + let mut positive_reports_blocked = vec![0; num_days]; + let bridge_ips_deviation = (2.0 * bridge_ips_sd).round() as u32; + let positive_reports_deviation = (2.0 * positive_reports_sd).round() as u32; + for i in 0..num_days { + // Suppose positive reports will go down by 2 SDs + positive_reports_blocked[i] = if positive_reports_deviation > positive_reports[i] { + 0 + } else { + positive_reports[i] - positive_reports_deviation + }; + // Suppose bridge stats will go down by 2 SDs + bridge_ips_blocked[i] = if bridge_ips_deviation > bridge_ips[i] { + 0 + } else { + bridge_ips[i] - bridge_ips_deviation + }; + // Suppose each user who would have submitted a positive report but + // didn't submits a negative report instead. + negative_reports_blocked[i] = + negative_reports[i] + positive_reports[i] - positive_reports_blocked[i]; + } + let (mean_vec_blocked, _sd_vec_blocked, cov_mat_blocked) = Self::stats(&[ + &bridge_ips_blocked, + &negative_reports_blocked, + &positive_reports_blocked, + ]); + let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap(); - if mvn.pdf(&DVector::from_vec(vec![ + let pdf = mvn.pdf(&DVector::from_vec(vec![ bridge_ips_today as f64, negative_reports_today as f64, positive_reports_today as f64, - ])) < alpha - { - (negative_reports_today as f64) > negative_reports_mean + negative_reports_sd - || (bridge_ips_today as f64) < bridge_ips_mean - bridge_ips_sd - || (positive_reports_today as f64) < positive_reports_mean - positive_reports_sd - } else { - false - } + ])); + + let mvn_blocked = MultivariateNormal::new(mean_vec_blocked, cov_mat_blocked).unwrap(); + let pdf_blocked = mvn_blocked.pdf(&DVector::from_vec(vec![ + bridge_ips_today as f64, + negative_reports_today as f64, + positive_reports_today as f64, + ])); + + // Also model negative reports in isolation + let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap(); + let nr_pdf = nr_normal.pdf(negative_reports_today as f64); + // Note we do NOT make this a function of positive signals + let nr_normal_blocked = Normal::new( + negative_reports_mean + 2.0 * negative_reports_sd, + negative_reports_sd, + ) + .unwrap(); + let nr_pdf_blocked = nr_normal_blocked.pdf(negative_reports_today as f64); + + (pdf / pdf_blocked).ln() < alpha || (nr_pdf / nr_pdf_blocked).ln() < alpha } } diff --git a/src/tests.rs b/src/tests.rs index de7d4c2..6a93ac8 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1119,7 +1119,210 @@ fn test_analysis() { ); } - // TODO: More tests + { + let mut date = get_date(); + + // New bridge info + let mut bridge_info = BridgeInfo::new([0; 20], &String::default()); + + bridge_info + .info_by_country + .insert("ru".to_string(), BridgeCountryInfo::new()); + let analyzer = analysis::NormalAnalyzer::new(5, 0.25); + let confidence = 0.95; + + let mut blocking_countries = HashSet::::new(); + + // No data today + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + + for i in 1..30 { + // 9-32 connections, 0-3 negative reports each day + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 8 * (i % 3 + 2), + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + i % 4, + ); + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + } + + // Data similar to previous days: + // 24 connections, 2 negative reports + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 24, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + 2, + ); + + // Should not be blocked because we have similar data. + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + + // Data different from previous days: + // 104 connections, 1 negative report + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 104, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + 1, + ); + + // This should not be blocked even though it's very different because + // it's different in the good direction. + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + + // Data different from previous days: + // 800 connections, 12 negative reports + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 800, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + 12, + ); + blocking_countries.insert("ru".to_string()); + + // The censor artificially inflated bridge stats to prevent detection. + // Ensure we still detect the censorship from negative reports. + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + } + + { + let mut date = get_date(); + + // New bridge info + let mut bridge_info = BridgeInfo::new([0; 20], &String::default()); + + bridge_info + .info_by_country + .insert("ru".to_string(), BridgeCountryInfo::new()); + let analyzer = analysis::NormalAnalyzer::new(5, 0.25); + let confidence = 0.95; + + let mut blocking_countries = HashSet::::new(); + + // No data today + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + + for i in 1..30 { + // 9-32 connections, 0-3 negative reports each day + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 8 * (i % 3 + 2), + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + i % 4, + ); + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + } + + // Data similar to previous days: + // 24 connections, 2 negative reports + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 24, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + 2, + ); + + // Should not be blocked because we have similar data. + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + + // Data different from previous days: + // 104 connections, 1 negative report + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 104, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + 1, + ); + + // This should not be blocked even though it's very different because + // it's different in the good direction. + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + + // Data different from previous days: + // 0 connections, 0 negative reports + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 0, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + 0, + ); + blocking_countries.insert("ru".to_string()); + + // This should be blocked because it's different in the bad direction. + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + } // TODO: Test stage 3 analysis }