Model both accessible bridge and blocked bridge in analysis
This commit is contained in:
parent
b2eb244757
commit
31b27a291a
110
src/analysis.rs
110
src/analysis.rs
|
@ -1,7 +1,7 @@
|
|||
use crate::{BridgeInfo, BridgeInfoType};
|
||||
use lox_library::proto::{level_up::LEVEL_INTERVAL, trust_promotion::UNTRUSTED_INTERVAL};
|
||||
use nalgebra::DVector;
|
||||
use statrs::distribution::{Continuous, MultivariateNormal};
|
||||
use statrs::distribution::{Continuous, MultivariateNormal, Normal};
|
||||
use std::{
|
||||
cmp::min,
|
||||
collections::{BTreeMap, HashSet},
|
||||
|
@ -302,22 +302,52 @@ impl Analyzer for NormalAnalyzer {
|
|||
let alpha = 1.0 - confidence;
|
||||
|
||||
let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports]);
|
||||
let bridge_ips_mean = mean_vec[0];
|
||||
let negative_reports_mean = mean_vec[1];
|
||||
let bridge_ips_sd = sd_vec[0];
|
||||
let negative_reports_sd = sd_vec[1];
|
||||
|
||||
// Artificially create data for alternative hypothesis
|
||||
let num_days = bridge_ips.len() as usize;
|
||||
let mut bridge_ips_blocked = vec![0; num_days];
|
||||
let mut negative_reports_blocked = vec![0; num_days];
|
||||
let bridge_ips_deviation = (2.0 * bridge_ips_sd).round() as u32;
|
||||
for i in 0..num_days {
|
||||
// Suppose bridge stats will go down by 2 SDs
|
||||
bridge_ips_blocked[i] = if bridge_ips_deviation > bridge_ips[i] {
|
||||
0
|
||||
} else {
|
||||
bridge_ips[i] - bridge_ips_deviation
|
||||
};
|
||||
// Suppose negative reports will go up by 2 SDs
|
||||
negative_reports_blocked[i] =
|
||||
negative_reports[i] + (2.0 * negative_reports_sd).round() as u32;
|
||||
}
|
||||
let (mean_vec_blocked, _sd_vec_blocked, cov_mat_blocked) =
|
||||
Self::stats(&[&bridge_ips_blocked, &negative_reports_blocked]);
|
||||
|
||||
let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
|
||||
let pdf = mvn.pdf(&DVector::from_vec(vec![
|
||||
bridge_ips_today as f64,
|
||||
negative_reports_today as f64,
|
||||
]));
|
||||
if pdf < alpha {
|
||||
(negative_reports_today as f64) > negative_reports_mean + negative_reports_sd
|
||||
|| (bridge_ips_today as f64) < bridge_ips_mean - bridge_ips_sd
|
||||
} else {
|
||||
false
|
||||
}
|
||||
|
||||
let mvn_blocked = MultivariateNormal::new(mean_vec_blocked, cov_mat_blocked).unwrap();
|
||||
let pdf_blocked = mvn_blocked.pdf(&DVector::from_vec(vec![
|
||||
bridge_ips_today as f64,
|
||||
negative_reports_today as f64,
|
||||
]));
|
||||
|
||||
// Also model negative reports in isolation
|
||||
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
|
||||
let nr_pdf = nr_normal.pdf(negative_reports_today as f64);
|
||||
let nr_normal_blocked = Normal::new(
|
||||
negative_reports_mean + 2.0 * negative_reports_sd,
|
||||
negative_reports_sd,
|
||||
)
|
||||
.unwrap();
|
||||
let nr_pdf_blocked = nr_normal_blocked.pdf(negative_reports_today as f64);
|
||||
|
||||
(pdf / pdf_blocked).ln() < alpha || (nr_pdf / nr_pdf_blocked).ln() < alpha
|
||||
}
|
||||
|
||||
/// Evaluate invite-only bridge with lv3+ users submitting positive reports
|
||||
|
@ -339,25 +369,67 @@ impl Analyzer for NormalAnalyzer {
|
|||
|
||||
let (mean_vec, sd_vec, cov_mat) =
|
||||
Self::stats(&[bridge_ips, negative_reports, positive_reports]);
|
||||
let bridge_ips_mean = mean_vec[0];
|
||||
let negative_reports_mean = mean_vec[1];
|
||||
let positive_reports_mean = mean_vec[2];
|
||||
let bridge_ips_sd = sd_vec[0];
|
||||
let negative_reports_sd = sd_vec[1];
|
||||
let positive_reports_sd = sd_vec[2];
|
||||
|
||||
// Artificially create data for alternative hypothesis
|
||||
let num_days = bridge_ips.len() as usize;
|
||||
let mut bridge_ips_blocked = vec![0; num_days];
|
||||
let mut negative_reports_blocked = vec![0; num_days];
|
||||
let mut positive_reports_blocked = vec![0; num_days];
|
||||
let bridge_ips_deviation = (2.0 * bridge_ips_sd).round() as u32;
|
||||
let positive_reports_deviation = (2.0 * positive_reports_sd).round() as u32;
|
||||
for i in 0..num_days {
|
||||
// Suppose positive reports will go down by 2 SDs
|
||||
positive_reports_blocked[i] = if positive_reports_deviation > positive_reports[i] {
|
||||
0
|
||||
} else {
|
||||
positive_reports[i] - positive_reports_deviation
|
||||
};
|
||||
// Suppose bridge stats will go down by 2 SDs
|
||||
bridge_ips_blocked[i] = if bridge_ips_deviation > bridge_ips[i] {
|
||||
0
|
||||
} else {
|
||||
bridge_ips[i] - bridge_ips_deviation
|
||||
};
|
||||
// Suppose each user who would have submitted a positive report but
|
||||
// didn't submits a negative report instead.
|
||||
negative_reports_blocked[i] =
|
||||
negative_reports[i] + positive_reports[i] - positive_reports_blocked[i];
|
||||
}
|
||||
let (mean_vec_blocked, _sd_vec_blocked, cov_mat_blocked) = Self::stats(&[
|
||||
&bridge_ips_blocked,
|
||||
&negative_reports_blocked,
|
||||
&positive_reports_blocked,
|
||||
]);
|
||||
|
||||
let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
|
||||
if mvn.pdf(&DVector::from_vec(vec![
|
||||
let pdf = mvn.pdf(&DVector::from_vec(vec![
|
||||
bridge_ips_today as f64,
|
||||
negative_reports_today as f64,
|
||||
positive_reports_today as f64,
|
||||
])) < alpha
|
||||
{
|
||||
(negative_reports_today as f64) > negative_reports_mean + negative_reports_sd
|
||||
|| (bridge_ips_today as f64) < bridge_ips_mean - bridge_ips_sd
|
||||
|| (positive_reports_today as f64) < positive_reports_mean - positive_reports_sd
|
||||
} else {
|
||||
false
|
||||
}
|
||||
]));
|
||||
|
||||
let mvn_blocked = MultivariateNormal::new(mean_vec_blocked, cov_mat_blocked).unwrap();
|
||||
let pdf_blocked = mvn_blocked.pdf(&DVector::from_vec(vec![
|
||||
bridge_ips_today as f64,
|
||||
negative_reports_today as f64,
|
||||
positive_reports_today as f64,
|
||||
]));
|
||||
|
||||
// Also model negative reports in isolation
|
||||
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
|
||||
let nr_pdf = nr_normal.pdf(negative_reports_today as f64);
|
||||
// Note we do NOT make this a function of positive signals
|
||||
let nr_normal_blocked = Normal::new(
|
||||
negative_reports_mean + 2.0 * negative_reports_sd,
|
||||
negative_reports_sd,
|
||||
)
|
||||
.unwrap();
|
||||
let nr_pdf_blocked = nr_normal_blocked.pdf(negative_reports_today as f64);
|
||||
|
||||
(pdf / pdf_blocked).ln() < alpha || (nr_pdf / nr_pdf_blocked).ln() < alpha
|
||||
}
|
||||
}
|
||||
|
|
205
src/tests.rs
205
src/tests.rs
|
@ -1119,7 +1119,210 @@ fn test_analysis() {
|
|||
);
|
||||
}
|
||||
|
||||
// TODO: More tests
|
||||
{
|
||||
let mut date = get_date();
|
||||
|
||||
// New bridge info
|
||||
let mut bridge_info = BridgeInfo::new([0; 20], &String::default());
|
||||
|
||||
bridge_info
|
||||
.info_by_country
|
||||
.insert("ru".to_string(), BridgeCountryInfo::new());
|
||||
let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
|
||||
let confidence = 0.95;
|
||||
|
||||
let mut blocking_countries = HashSet::<String>::new();
|
||||
|
||||
// No data today
|
||||
assert_eq!(
|
||||
blocked_in(&analyzer, &bridge_info, confidence, date),
|
||||
blocking_countries
|
||||
);
|
||||
|
||||
for i in 1..30 {
|
||||
// 9-32 connections, 0-3 negative reports each day
|
||||
date += 1;
|
||||
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
|
||||
BridgeInfoType::BridgeIps,
|
||||
date,
|
||||
8 * (i % 3 + 2),
|
||||
);
|
||||
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
|
||||
BridgeInfoType::NegativeReports,
|
||||
date,
|
||||
i % 4,
|
||||
);
|
||||
assert_eq!(
|
||||
blocked_in(&analyzer, &bridge_info, confidence, date),
|
||||
blocking_countries
|
||||
);
|
||||
}
|
||||
|
||||
// Data similar to previous days:
|
||||
// 24 connections, 2 negative reports
|
||||
date += 1;
|
||||
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
|
||||
BridgeInfoType::BridgeIps,
|
||||
date,
|
||||
24,
|
||||
);
|
||||
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
|
||||
BridgeInfoType::NegativeReports,
|
||||
date,
|
||||
2,
|
||||
);
|
||||
|
||||
// Should not be blocked because we have similar data.
|
||||
assert_eq!(
|
||||
blocked_in(&analyzer, &bridge_info, confidence, date),
|
||||
blocking_countries
|
||||
);
|
||||
|
||||
// Data different from previous days:
|
||||
// 104 connections, 1 negative report
|
||||
date += 1;
|
||||
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
|
||||
BridgeInfoType::BridgeIps,
|
||||
date,
|
||||
104,
|
||||
);
|
||||
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
|
||||
BridgeInfoType::NegativeReports,
|
||||
date,
|
||||
1,
|
||||
);
|
||||
|
||||
// This should not be blocked even though it's very different because
|
||||
// it's different in the good direction.
|
||||
assert_eq!(
|
||||
blocked_in(&analyzer, &bridge_info, confidence, date),
|
||||
blocking_countries
|
||||
);
|
||||
|
||||
// Data different from previous days:
|
||||
// 800 connections, 12 negative reports
|
||||
date += 1;
|
||||
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
|
||||
BridgeInfoType::BridgeIps,
|
||||
date,
|
||||
800,
|
||||
);
|
||||
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
|
||||
BridgeInfoType::NegativeReports,
|
||||
date,
|
||||
12,
|
||||
);
|
||||
blocking_countries.insert("ru".to_string());
|
||||
|
||||
// The censor artificially inflated bridge stats to prevent detection.
|
||||
// Ensure we still detect the censorship from negative reports.
|
||||
assert_eq!(
|
||||
blocked_in(&analyzer, &bridge_info, confidence, date),
|
||||
blocking_countries
|
||||
);
|
||||
}
|
||||
|
||||
{
|
||||
let mut date = get_date();
|
||||
|
||||
// New bridge info
|
||||
let mut bridge_info = BridgeInfo::new([0; 20], &String::default());
|
||||
|
||||
bridge_info
|
||||
.info_by_country
|
||||
.insert("ru".to_string(), BridgeCountryInfo::new());
|
||||
let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
|
||||
let confidence = 0.95;
|
||||
|
||||
let mut blocking_countries = HashSet::<String>::new();
|
||||
|
||||
// No data today
|
||||
assert_eq!(
|
||||
blocked_in(&analyzer, &bridge_info, confidence, date),
|
||||
blocking_countries
|
||||
);
|
||||
|
||||
for i in 1..30 {
|
||||
// 9-32 connections, 0-3 negative reports each day
|
||||
date += 1;
|
||||
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
|
||||
BridgeInfoType::BridgeIps,
|
||||
date,
|
||||
8 * (i % 3 + 2),
|
||||
);
|
||||
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
|
||||
BridgeInfoType::NegativeReports,
|
||||
date,
|
||||
i % 4,
|
||||
);
|
||||
assert_eq!(
|
||||
blocked_in(&analyzer, &bridge_info, confidence, date),
|
||||
blocking_countries
|
||||
);
|
||||
}
|
||||
|
||||
// Data similar to previous days:
|
||||
// 24 connections, 2 negative reports
|
||||
date += 1;
|
||||
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
|
||||
BridgeInfoType::BridgeIps,
|
||||
date,
|
||||
24,
|
||||
);
|
||||
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
|
||||
BridgeInfoType::NegativeReports,
|
||||
date,
|
||||
2,
|
||||
);
|
||||
|
||||
// Should not be blocked because we have similar data.
|
||||
assert_eq!(
|
||||
blocked_in(&analyzer, &bridge_info, confidence, date),
|
||||
blocking_countries
|
||||
);
|
||||
|
||||
// Data different from previous days:
|
||||
// 104 connections, 1 negative report
|
||||
date += 1;
|
||||
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
|
||||
BridgeInfoType::BridgeIps,
|
||||
date,
|
||||
104,
|
||||
);
|
||||
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
|
||||
BridgeInfoType::NegativeReports,
|
||||
date,
|
||||
1,
|
||||
);
|
||||
|
||||
// This should not be blocked even though it's very different because
|
||||
// it's different in the good direction.
|
||||
assert_eq!(
|
||||
blocked_in(&analyzer, &bridge_info, confidence, date),
|
||||
blocking_countries
|
||||
);
|
||||
|
||||
// Data different from previous days:
|
||||
// 0 connections, 0 negative reports
|
||||
date += 1;
|
||||
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
|
||||
BridgeInfoType::BridgeIps,
|
||||
date,
|
||||
0,
|
||||
);
|
||||
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
|
||||
BridgeInfoType::NegativeReports,
|
||||
date,
|
||||
0,
|
||||
);
|
||||
blocking_countries.insert("ru".to_string());
|
||||
|
||||
// This should be blocked because it's different in the bad direction.
|
||||
assert_eq!(
|
||||
blocked_in(&analyzer, &bridge_info, confidence, date),
|
||||
blocking_countries
|
||||
);
|
||||
}
|
||||
|
||||
// TODO: Test stage 3 analysis
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue