Start stage 3 based on actual observation of a positive report

Also track how much historical data we have on a per-country basis
This commit is contained in:
Vecna 2024-04-27 13:20:05 -04:00
parent eef9e5164e
commit e2b1f9aa7d
3 changed files with 407 additions and 22 deletions

View File

@ -54,8 +54,8 @@ pub fn blocked_in(
) -> HashSet<String> {
let mut blocked_in = HashSet::<String>::new();
let today = date;
let age = today - bridge_info.first_seen;
for (country, info) in &bridge_info.info_by_country {
let age = today - info.first_seen;
if info.blocked {
// Assume bridges never become unblocked
blocked_in.insert(country.to_string());
@ -122,8 +122,7 @@ pub fn blocked_in(
) {
blocked_in.insert(country.to_string());
}
} else if age
< UNTRUSTED_INTERVAL + LEVEL_INTERVAL[1] + LEVEL_INTERVAL[2] + UNTRUSTED_INTERVAL
} else if info.first_pr.is_none() || today < info.first_pr.unwrap() + UNTRUSTED_INTERVAL
{
// invite-only bridge without 30+ days of historical data on
// positive reports

View File

@ -57,9 +57,6 @@ pub struct BridgeInfo {
/// nickname of bridge (probably not necessary)
pub nickname: String,
/// first Julian date we started collecting data on this bridge
pub first_seen: u32,
/// map of countries to data for this bridge in that country
pub info_by_country: HashMap<String, BridgeCountryInfo>,
}
@ -69,7 +66,6 @@ impl BridgeInfo {
Self {
fingerprint: fingerprint,
nickname: nickname.to_string(),
first_seen: get_date(),
info_by_country: HashMap::<String, BridgeCountryInfo>::new(),
}
}
@ -82,7 +78,7 @@ impl fmt::Display for BridgeInfo {
array_bytes::bytes2hex("", self.fingerprint).as_str()
);
str.push_str(format!("nickname: {}\n", self.nickname).as_str());
str.push_str(format!("first_seen: {}\n", self.first_seen).as_str());
//str.push_str(format!("first_seen: {}\n", self.first_seen).as_str());
str.push_str("info_by_country:");
for country in self.info_by_country.keys() {
str.push_str(format!("\n country: {}", country).as_str());
@ -107,13 +103,21 @@ pub enum BridgeInfoType {
pub struct BridgeCountryInfo {
pub info_by_day: BTreeMap<u32, BTreeMap<BridgeInfoType, u32>>,
pub blocked: bool,
/// first Julian date we saw data from this country for this bridge
pub first_seen: u32,
/// first Julian date we saw a positive report from this country for this bridge
pub first_pr: Option<u32>,
}
impl BridgeCountryInfo {
pub fn new() -> Self {
pub fn new(first_seen: u32) -> Self {
Self {
info_by_day: BTreeMap::<u32, BTreeMap<BridgeInfoType, u32>>::new(),
blocked: false,
first_seen: first_seen,
first_pr: None,
}
}
@ -137,12 +141,25 @@ impl BridgeCountryInfo {
info.insert(info_type, count);
self.info_by_day.insert(date, info);
}
// If this is the first instance of positive reports, save the date
if self.first_pr.is_none() && info_type == BridgeInfoType::PositiveReports && count > 0 {
self.first_pr = Some(date);
}
}
}
impl fmt::Display for BridgeCountryInfo {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut str = String::from("info:");
let mut str = format!("blocked: {}\n", self.blocked);
str.push_str(format!("first seen: {}\n", self.first_seen).as_str());
let first_pr = if self.first_pr.is_none() {
"never".to_string()
} else {
self.first_pr.unwrap().to_string()
};
str.push_str(format!("first positive report observed: {}\n", first_pr).as_str());
str.push_str("info:");
for date in self.info_by_day.keys() {
let info = self.info_by_day.get(date).unwrap();
let ip_count = match info.get(&BridgeInfoType::BridgeIps) {
@ -239,7 +256,7 @@ pub fn add_extra_info_to_db(db: &Db, extra_info: ExtraInfo) {
);
} else {
// No existing entry; make a new one.
let mut bridge_country_info = BridgeCountryInfo::new();
let mut bridge_country_info = BridgeCountryInfo::new(extra_info.date);
bridge_country_info.add_info(
BridgeInfoType::BridgeIps,
extra_info.date,
@ -444,7 +461,7 @@ pub async fn verify_negative_reports(
/// Process today's negative reports and store the count of verified reports in
/// the database.
pub async fn update_negative_reports(db: &Db, distributors: &BTreeMap<BridgeDistributor, String>) {
let mut all_negative_reports = match db.get("nrs-to-process").unwrap() {
let all_negative_reports = match db.get("nrs-to-process").unwrap() {
Some(v) => bincode::deserialize(&v).unwrap(),
None => BTreeMap::<String, Vec<SerializableNegativeReport>>::new(),
};
@ -475,7 +492,7 @@ pub async fn update_negative_reports(db: &Db, distributors: &BTreeMap<BridgeDist
bridge_country_info.add_info(BridgeInfoType::NegativeReports, date, count_valid);
} else {
// No existing entry; make a new one.
let mut bridge_country_info = BridgeCountryInfo::new();
let mut bridge_country_info = BridgeCountryInfo::new(date);
bridge_country_info.add_info(BridgeInfoType::NegativeReports, date, count_valid);
bridge_info
.info_by_country
@ -557,7 +574,7 @@ pub async fn verify_positive_reports(
/// Process today's positive reports and store the count of verified reports in
/// the database.
pub async fn update_positive_reports(db: &Db, distributors: &BTreeMap<BridgeDistributor, String>) {
let mut all_positive_reports = match db.get("prs-to-process").unwrap() {
let all_positive_reports = match db.get("prs-to-process").unwrap() {
Some(v) => bincode::deserialize(&v).unwrap(),
None => BTreeMap::<String, Vec<SerializablePositiveReport>>::new(),
};
@ -588,7 +605,7 @@ pub async fn update_positive_reports(db: &Db, distributors: &BTreeMap<BridgeDist
bridge_country_info.add_info(BridgeInfoType::PositiveReports, date, count_valid);
} else {
// No existing entry; make a new one.
let mut bridge_country_info = BridgeCountryInfo::new();
let mut bridge_country_info = BridgeCountryInfo::new(date);
bridge_country_info.add_info(BridgeInfoType::PositiveReports, date, count_valid);
bridge_info
.info_by_country

View File

@ -840,7 +840,7 @@ fn test_analysis() {
bridge_info
.info_by_country
.insert("ru".to_string(), BridgeCountryInfo::new());
.insert("ru".to_string(), BridgeCountryInfo::new(date));
let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
let confidence = 0.95;
@ -899,7 +899,7 @@ fn test_analysis() {
bridge_info
.info_by_country
.insert("ru".to_string(), BridgeCountryInfo::new());
.insert("ru".to_string(), BridgeCountryInfo::new(date));
let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
let confidence = 0.95;
@ -973,7 +973,7 @@ fn test_analysis() {
bridge_info
.info_by_country
.insert("ru".to_string(), BridgeCountryInfo::new());
.insert("ru".to_string(), BridgeCountryInfo::new(date));
let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
let confidence = 0.95;
@ -1025,7 +1025,7 @@ fn test_analysis() {
bridge_info
.info_by_country
.insert("ru".to_string(), BridgeCountryInfo::new());
.insert("ru".to_string(), BridgeCountryInfo::new(date));
let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
let confidence = 0.95;
@ -1127,7 +1127,7 @@ fn test_analysis() {
bridge_info
.info_by_country
.insert("ru".to_string(), BridgeCountryInfo::new());
.insert("ru".to_string(), BridgeCountryInfo::new(date));
let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
let confidence = 0.95;
@ -1230,7 +1230,7 @@ fn test_analysis() {
bridge_info
.info_by_country
.insert("ru".to_string(), BridgeCountryInfo::new());
.insert("ru".to_string(), BridgeCountryInfo::new(date));
let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
let confidence = 0.95;
@ -1324,5 +1324,374 @@ fn test_analysis() {
);
}
// TODO: Test stage 3 analysis
// Test stage 3 analysis
{
let mut date = get_date();
// New bridge info
let mut bridge_info = BridgeInfo::new([0; 20], &String::default());
bridge_info
.info_by_country
.insert("ru".to_string(), BridgeCountryInfo::new(date));
let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
let confidence = 0.95;
let mut blocking_countries = HashSet::<String>::new();
// No data today
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
for i in 1..30 {
// 9-32 connections, 0-3 negative reports, 16-20 positive reports each day
date += 1;
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::BridgeIps,
date,
8 * (i % 3 + 2),
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::NegativeReports,
date,
i % 4,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::PositiveReports,
date,
16 + i % 5,
);
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
}
// Data similar to previous days:
// 24 connections, 2 negative reports, 17 positive reports
date += 1;
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::BridgeIps,
date,
24,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::NegativeReports,
date,
2,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::PositiveReports,
date,
17,
);
// Should not be blocked because we have similar data.
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
// Data different from previous days:
// 104 connections, 1 negative report, 100 positive reports
date += 1;
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::BridgeIps,
date,
104,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::NegativeReports,
date,
1,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::PositiveReports,
date,
100,
);
// This should not be blocked even though it's very different because
// it's different in the good direction.
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
// Data different from previous days:
// 40 connections, 12 negative reports, 40 positive reports
date += 1;
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::BridgeIps,
date,
40,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::NegativeReports,
date,
12,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::PositiveReports,
date,
40,
);
blocking_countries.insert("ru".to_string());
// This should be blocked because it's different in the bad direction.
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
}
{
let mut date = get_date();
// New bridge info
let mut bridge_info = BridgeInfo::new([0; 20], &String::default());
bridge_info
.info_by_country
.insert("ru".to_string(), BridgeCountryInfo::new(date));
let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
let confidence = 0.95;
let mut blocking_countries = HashSet::<String>::new();
// No data today
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
for i in 1..30 {
// 9-32 connections, 0-3 negative reports, 16-20 positive reports each day
date += 1;
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::BridgeIps,
date,
8 * (i % 3 + 2),
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::NegativeReports,
date,
i % 4,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::PositiveReports,
date,
16 + i % 5,
);
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
}
// Data similar to previous days:
// 24 connections, 2 negative reports, 17 positive reports
date += 1;
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::BridgeIps,
date,
24,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::NegativeReports,
date,
2,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::PositiveReports,
date,
17,
);
// Should not be blocked because we have similar data.
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
// Data different from previous days:
// 104 connections, 1 negative report, 85 positive reports
date += 1;
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::BridgeIps,
date,
104,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::NegativeReports,
date,
1,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::PositiveReports,
date,
85,
);
// This should not be blocked even though it's very different because
// it's different in the good direction.
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
// Data different from previous days:
// 800 connections, 12 negative reports, 750 positive reports
date += 1;
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::BridgeIps,
date,
800,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::NegativeReports,
date,
12,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::PositiveReports,
date,
750,
);
blocking_countries.insert("ru".to_string());
// The censor artificially inflated bridge stats to prevent detection.
// Ensure we still detect the censorship from negative reports.
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
}
{
let mut date = get_date();
// New bridge info
let mut bridge_info = BridgeInfo::new([0; 20], &String::default());
bridge_info
.info_by_country
.insert("ru".to_string(), BridgeCountryInfo::new(date));
let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
let confidence = 0.95;
let mut blocking_countries = HashSet::<String>::new();
// No data today
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
for i in 1..30 {
// 9-32 connections, 0-3 negative reports, 16-20 positive reports each day
date += 1;
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::BridgeIps,
date,
8 * (i % 3 + 2),
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::NegativeReports,
date,
i % 4,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::PositiveReports,
date,
16 + i % 5,
);
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
}
// Data similar to previous days:
// 24 connections, 2 negative reports, 17 positive reports
date += 1;
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::BridgeIps,
date,
24,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::NegativeReports,
date,
2,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::PositiveReports,
date,
17,
);
// Should not be blocked because we have similar data.
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
// Data different from previous days:
// 104 connections, 1 negative report, 100 positive reports
date += 1;
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::BridgeIps,
date,
104,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::NegativeReports,
date,
1,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::PositiveReports,
date,
100,
);
// This should not be blocked even though it's very different because
// it's different in the good direction.
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
// Data different from previous days:
// 24 connections, 1 negative report, 1 positive report
date += 1;
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::BridgeIps,
date,
24,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::NegativeReports,
date,
1,
);
bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
BridgeInfoType::PositiveReports,
date,
1,
);
blocking_countries.insert("ru".to_string());
// This should be blocked because it's different in the bad direction.
assert_eq!(
blocked_in(&analyzer, &bridge_info, confidence, date),
blocking_countries
);
}
}