Compare commits

..

4 Commits

Author SHA1 Message Date
Vecna e889cba878 Remove info print statements 2024-04-19 01:34:24 -04:00
Vecna f0133ce0dc The updater can just shut down with the main function 2024-04-18 23:50:37 -04:00
Vecna 851686cf94 Mark unused variable bridge_ips in stage one analysis 2024-04-18 22:42:27 -04:00
Vecna 36395181d3 Improve analysis
We seem to get better results when we scale bridge IPs down to multiples of 1 instead of 8 and only mark bridges as blocked if they differ in the 'bad' direction by at least one standard deviation from the mean.
2024-04-18 22:27:57 -04:00
3 changed files with 156 additions and 35 deletions

View File

@ -7,6 +7,8 @@ use std::{
collections::{BTreeMap, HashSet}, collections::{BTreeMap, HashSet},
}; };
const SCALE_BRIDGE_IPS: u32 = 8;
/// Provides a function for predicting which countries block this bridge /// Provides a function for predicting which countries block this bridge
pub trait Analyzer { pub trait Analyzer {
/// Evaluate open-entry bridge. Returns true if blocked, false otherwise. /// Evaluate open-entry bridge. Returns true if blocked, false otherwise.
@ -69,15 +71,15 @@ pub fn blocked_in(
None => &new_map_binding, None => &new_map_binding,
}; };
let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) { let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) {
Some(v) => *v, Some(&v) => v / SCALE_BRIDGE_IPS,
None => 0, None => 0,
}; };
let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports) { let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports) {
Some(v) => *v, Some(&v) => v,
None => 0, None => 0,
}; };
let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports) { let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports) {
Some(v) => *v, Some(&v) => v,
None => 0, None => 0,
}; };
@ -96,7 +98,7 @@ pub fn blocked_in(
None => &new_map_binding, None => &new_map_binding,
}; };
bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) { bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) {
Some(&v) => v, Some(&v) => v / SCALE_BRIDGE_IPS,
None => 0, None => 0,
}; };
negative_reports[i as usize] = match day_info.get(&BridgeInfoType::NegativeReports) negative_reports[i as usize] = match day_info.get(&BridgeInfoType::NegativeReports)
@ -213,22 +215,33 @@ impl NormalAnalyzer {
} }
} }
fn mean_vector_and_covariance_matrix(data: &[&[u32]]) -> (Vec<f64>, Vec<f64>) { // Returns the mean vector, vector of individual standard deviations, and
// covariance matrix
fn stats(data: &[&[u32]]) -> (Vec<f64>, Vec<f64>, Vec<f64>) {
let n = data.len(); let n = data.len();
// Compute mean vector // Compute mean and standard deviation vectors
let mean_vec = { let (mean_vec, sd_vec) = {
let mut mean_vec = Vec::<f64>::new(); let mut mean_vec = Vec::<f64>::new();
let mut sd_vec = Vec::<f64>::new();
for var in data { for var in data {
mean_vec.push({ // Compute mean
let mut sum = 0.0; let mut sum = 0.0;
for count in *var { for count in *var {
sum += *count as f64; sum += *count as f64;
} }
sum / var.len() as f64 let mean = sum / var.len() as f64;
});
// Compute standard deviation
let mut sum = 0.0;
for count in *var {
sum += (*count as f64 - mean).powi(2);
}
let sd = (sum / var.len() as f64).sqrt();
mean_vec.push(mean);
sd_vec.push(sd);
} }
mean_vec (mean_vec, sd_vec)
}; };
// Compute covariance matrix // Compute covariance matrix
@ -258,7 +271,7 @@ impl NormalAnalyzer {
cov_mat cov_mat
}; };
(mean_vec, cov_mat) (mean_vec, sd_vec, cov_mat)
} }
} }
@ -273,7 +286,8 @@ impl Analyzer for NormalAnalyzer {
negative_reports_today: u32, negative_reports_today: u32,
) -> bool { ) -> bool {
negative_reports_today > self.max_threshold negative_reports_today > self.max_threshold
|| f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today) || f64::from(negative_reports_today)
> self.scaling_factor * f64::from(bridge_ips_today) * SCALE_BRIDGE_IPS as f64
} }
/// Evaluate invite-only bridge based on last 30 days /// Evaluate invite-only bridge based on last 30 days
@ -288,19 +302,22 @@ impl Analyzer for NormalAnalyzer {
assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize); assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
assert_eq!(bridge_ips.len(), negative_reports.len()); assert_eq!(bridge_ips.len(), negative_reports.len());
let (mean_vec, cov_mat) = let alpha = 1.0 - confidence;
Self::mean_vector_and_covariance_matrix(&[bridge_ips, negative_reports]);
let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports]);
let bridge_ips_mean = mean_vec[0]; let bridge_ips_mean = mean_vec[0];
let negative_reports_mean = mean_vec[1]; let negative_reports_mean = mean_vec[1];
let bridge_ips_sd = sd_vec[0];
let negative_reports_sd = sd_vec[1];
let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap(); let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
if mvn.pdf(&DVector::from_vec(vec![ if mvn.pdf(&DVector::from_vec(vec![
bridge_ips_today as f64, bridge_ips_today as f64,
negative_reports_today as f64, negative_reports_today as f64,
])) < confidence ])) < alpha
{ {
(negative_reports_today as f64) > negative_reports_mean (negative_reports_today as f64) > negative_reports_mean + negative_reports_sd
|| (bridge_ips_today as f64) < bridge_ips_mean || (bridge_ips_today as f64) < bridge_ips_mean - bridge_ips_sd
} else { } else {
false false
} }
@ -321,25 +338,27 @@ impl Analyzer for NormalAnalyzer {
assert_eq!(bridge_ips.len(), negative_reports.len()); assert_eq!(bridge_ips.len(), negative_reports.len());
assert_eq!(bridge_ips.len(), positive_reports.len()); assert_eq!(bridge_ips.len(), positive_reports.len());
let (mean_vec, cov_mat) = Self::mean_vector_and_covariance_matrix(&[ let alpha = 1.0 - confidence;
bridge_ips,
negative_reports, let (mean_vec, sd_vec, cov_mat) =
positive_reports, Self::stats(&[bridge_ips, negative_reports, positive_reports]);
]);
let bridge_ips_mean = mean_vec[0]; let bridge_ips_mean = mean_vec[0];
let negative_reports_mean = mean_vec[1]; let negative_reports_mean = mean_vec[1];
let positive_reports_mean = mean_vec[2]; let positive_reports_mean = mean_vec[2];
let bridge_ips_sd = sd_vec[0];
let negative_reports_sd = sd_vec[1];
let positive_reports_sd = sd_vec[2];
let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap(); let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
if mvn.pdf(&DVector::from_vec(vec![ if mvn.pdf(&DVector::from_vec(vec![
bridge_ips_today as f64, bridge_ips_today as f64,
negative_reports_today as f64, negative_reports_today as f64,
positive_reports_today as f64, positive_reports_today as f64,
])) < confidence ])) < alpha
{ {
(negative_reports_today as f64) > negative_reports_mean (negative_reports_today as f64) > negative_reports_mean + negative_reports_sd
|| (bridge_ips_today as f64) < bridge_ips_mean || (bridge_ips_today as f64) < bridge_ips_mean - bridge_ips_sd
|| (positive_reports_today as f64) < positive_reports_mean || (positive_reports_today as f64) < positive_reports_mean - positive_reports_sd
} else { } else {
false false
} }

View File

@ -183,11 +183,9 @@ async fn main() {
let updater_tx = request_tx.clone(); let updater_tx = request_tx.clone();
let shutdown_cmd_tx = request_tx.clone(); let shutdown_cmd_tx = request_tx.clone();
// create the shutdown broadcast channel and clone for every thread // create the shutdown broadcast channel
let (shutdown_tx, mut shutdown_rx) = broadcast::channel(16); let (shutdown_tx, mut shutdown_rx) = broadcast::channel(16);
let kill = shutdown_tx.subscribe(); let kill = shutdown_tx.subscribe();
// TODO: Gracefully shut down updater
let kill_updater = shutdown_tx.subscribe();
// Listen for ctrl_c, send signal to broadcast shutdown to all threads by dropping shutdown_tx // Listen for ctrl_c, send signal to broadcast shutdown to all threads by dropping shutdown_tx
let shutdown_handler = spawn(async move { let shutdown_handler = spawn(async move {

View File

@ -1015,7 +1015,111 @@ fn test_analysis() {
); );
} }
// TODO: Test stage 2 analysis // Test stage 2 analysis
{
let mut date = get_date();
// New bridge info
let mut bridge_info = BridgeInfo::new([0; 20], &String::default());
bridge_info
.info_by_country
.insert("ru".to_string(), BridgeCountryInfo::new());
let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
let confidence = 0.95;
let mut blocking_countries = HashSet::<String>::new();
// No data today
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
for i in 1..30 {
// 9-32 connections, 0-3 negative reports each day
date += 1;
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::BridgeIps,
date,
8 * (i % 3 + 2),
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::NegativeReports,
date,
i % 4,
);
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
}
// Data similar to previous days:
// 24 connections, 2 negative reports
date += 1;
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::BridgeIps,
date,
24,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::NegativeReports,
date,
2,
);
// Should not be blocked because we have similar data.
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
// Data different from previous days:
// 104 connections, 1 negative report
date += 1;
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::BridgeIps,
date,
104,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::NegativeReports,
date,
1,
);
// This should not be blocked even though it's very different because
// it's different in the good direction.
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
// Data different from previous days:
// 40 connections, 12 negative reports
date += 1;
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::BridgeIps,
date,
40,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::NegativeReports,
date,
12,
);
blocking_countries.insert("ru".to_string());
// This should be blocked because it's different in the bad direction.
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
}
// TODO: More tests
// TODO: Test stage 3 analysis // TODO: Test stage 3 analysis
} }