diff --git a/src/analysis.rs b/src/analysis.rs index cb49a4f..1e53661 100644 --- a/src/analysis.rs +++ b/src/analysis.rs @@ -54,8 +54,8 @@ pub fn blocked_in( ) -> HashSet { let mut blocked_in = HashSet::::new(); let today = date; - let age = today - bridge_info.first_seen; for (country, info) in &bridge_info.info_by_country { + let age = today - info.first_seen; if info.blocked { // Assume bridges never become unblocked blocked_in.insert(country.to_string()); @@ -122,8 +122,7 @@ pub fn blocked_in( ) { blocked_in.insert(country.to_string()); } - } else if age - < UNTRUSTED_INTERVAL + LEVEL_INTERVAL[1] + LEVEL_INTERVAL[2] + UNTRUSTED_INTERVAL + } else if info.first_pr.is_none() || today < info.first_pr.unwrap() + UNTRUSTED_INTERVAL { // invite-only bridge without 30+ days of historical data on // positive reports diff --git a/src/lib.rs b/src/lib.rs index caf6fec..4bc14f5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -57,9 +57,6 @@ pub struct BridgeInfo { /// nickname of bridge (probably not necessary) pub nickname: String, - /// first Julian date we started collecting data on this bridge - pub first_seen: u32, - /// map of countries to data for this bridge in that country pub info_by_country: HashMap, } @@ -69,7 +66,6 @@ impl BridgeInfo { Self { fingerprint: fingerprint, nickname: nickname.to_string(), - first_seen: get_date(), info_by_country: HashMap::::new(), } } @@ -82,7 +78,7 @@ impl fmt::Display for BridgeInfo { array_bytes::bytes2hex("", self.fingerprint).as_str() ); str.push_str(format!("nickname: {}\n", self.nickname).as_str()); - str.push_str(format!("first_seen: {}\n", self.first_seen).as_str()); + //str.push_str(format!("first_seen: {}\n", self.first_seen).as_str()); str.push_str("info_by_country:"); for country in self.info_by_country.keys() { str.push_str(format!("\n country: {}", country).as_str()); @@ -107,13 +103,21 @@ pub enum BridgeInfoType { pub struct BridgeCountryInfo { pub info_by_day: BTreeMap>, pub blocked: bool, + + /// first Julian date we saw data from this country for this bridge + pub first_seen: u32, + + /// first Julian date we saw a positive report from this country for this bridge + pub first_pr: Option, } impl BridgeCountryInfo { - pub fn new() -> Self { + pub fn new(first_seen: u32) -> Self { Self { info_by_day: BTreeMap::>::new(), blocked: false, + first_seen: first_seen, + first_pr: None, } } @@ -137,12 +141,25 @@ impl BridgeCountryInfo { info.insert(info_type, count); self.info_by_day.insert(date, info); } + + // If this is the first instance of positive reports, save the date + if self.first_pr.is_none() && info_type == BridgeInfoType::PositiveReports && count > 0 { + self.first_pr = Some(date); + } } } impl fmt::Display for BridgeCountryInfo { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut str = String::from("info:"); + let mut str = format!("blocked: {}\n", self.blocked); + str.push_str(format!("first seen: {}\n", self.first_seen).as_str()); + let first_pr = if self.first_pr.is_none() { + "never".to_string() + } else { + self.first_pr.unwrap().to_string() + }; + str.push_str(format!("first positive report observed: {}\n", first_pr).as_str()); + str.push_str("info:"); for date in self.info_by_day.keys() { let info = self.info_by_day.get(date).unwrap(); let ip_count = match info.get(&BridgeInfoType::BridgeIps) { @@ -239,7 +256,7 @@ pub fn add_extra_info_to_db(db: &Db, extra_info: ExtraInfo) { ); } else { // No existing entry; make a new one. - let mut bridge_country_info = BridgeCountryInfo::new(); + let mut bridge_country_info = BridgeCountryInfo::new(extra_info.date); bridge_country_info.add_info( BridgeInfoType::BridgeIps, extra_info.date, @@ -444,7 +461,7 @@ pub async fn verify_negative_reports( /// Process today's negative reports and store the count of verified reports in /// the database. pub async fn update_negative_reports(db: &Db, distributors: &BTreeMap) { - let mut all_negative_reports = match db.get("nrs-to-process").unwrap() { + let all_negative_reports = match db.get("nrs-to-process").unwrap() { Some(v) => bincode::deserialize(&v).unwrap(), None => BTreeMap::>::new(), }; @@ -475,7 +492,7 @@ pub async fn update_negative_reports(db: &Db, distributors: &BTreeMap) { - let mut all_positive_reports = match db.get("prs-to-process").unwrap() { + let all_positive_reports = match db.get("prs-to-process").unwrap() { Some(v) => bincode::deserialize(&v).unwrap(), None => BTreeMap::>::new(), }; @@ -588,7 +605,7 @@ pub async fn update_positive_reports(db: &Db, distributors: &BTreeMap::new(); + + // No data today + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + + for i in 1..30 { + // 9-32 connections, 0-3 negative reports, 16-20 positive reports each day + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 8 * (i % 3 + 2), + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + i % 4, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::PositiveReports, + date, + 16 + i % 5, + ); + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + } + + // Data similar to previous days: + // 24 connections, 2 negative reports, 17 positive reports + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 24, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + 2, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::PositiveReports, + date, + 17, + ); + + // Should not be blocked because we have similar data. + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + + // Data different from previous days: + // 104 connections, 1 negative report, 100 positive reports + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 104, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + 1, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::PositiveReports, + date, + 100, + ); + + // This should not be blocked even though it's very different because + // it's different in the good direction. + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + + // Data different from previous days: + // 40 connections, 12 negative reports, 40 positive reports + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 40, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + 12, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::PositiveReports, + date, + 40, + ); + blocking_countries.insert("ru".to_string()); + + // This should be blocked because it's different in the bad direction. + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + } + + { + let mut date = get_date(); + + // New bridge info + let mut bridge_info = BridgeInfo::new([0; 20], &String::default()); + + bridge_info + .info_by_country + .insert("ru".to_string(), BridgeCountryInfo::new(date)); + let analyzer = analysis::NormalAnalyzer::new(5, 0.25); + let confidence = 0.95; + + let mut blocking_countries = HashSet::::new(); + + // No data today + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + + for i in 1..30 { + // 9-32 connections, 0-3 negative reports, 16-20 positive reports each day + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 8 * (i % 3 + 2), + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + i % 4, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::PositiveReports, + date, + 16 + i % 5, + ); + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + } + + // Data similar to previous days: + // 24 connections, 2 negative reports, 17 positive reports + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 24, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + 2, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::PositiveReports, + date, + 17, + ); + + // Should not be blocked because we have similar data. + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + + // Data different from previous days: + // 104 connections, 1 negative report, 85 positive reports + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 104, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + 1, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::PositiveReports, + date, + 85, + ); + + // This should not be blocked even though it's very different because + // it's different in the good direction. + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + + // Data different from previous days: + // 800 connections, 12 negative reports, 750 positive reports + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 800, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + 12, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::PositiveReports, + date, + 750, + ); + + blocking_countries.insert("ru".to_string()); + + // The censor artificially inflated bridge stats to prevent detection. + // Ensure we still detect the censorship from negative reports. + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + } + + { + let mut date = get_date(); + + // New bridge info + let mut bridge_info = BridgeInfo::new([0; 20], &String::default()); + + bridge_info + .info_by_country + .insert("ru".to_string(), BridgeCountryInfo::new(date)); + let analyzer = analysis::NormalAnalyzer::new(5, 0.25); + let confidence = 0.95; + + let mut blocking_countries = HashSet::::new(); + + // No data today + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + + for i in 1..30 { + // 9-32 connections, 0-3 negative reports, 16-20 positive reports each day + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 8 * (i % 3 + 2), + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + i % 4, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::PositiveReports, + date, + 16 + i % 5, + ); + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + } + + // Data similar to previous days: + // 24 connections, 2 negative reports, 17 positive reports + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 24, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + 2, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::PositiveReports, + date, + 17, + ); + + // Should not be blocked because we have similar data. + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + + // Data different from previous days: + // 104 connections, 1 negative report, 100 positive reports + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 104, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + 1, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::PositiveReports, + date, + 100, + ); + + // This should not be blocked even though it's very different because + // it's different in the good direction. + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + + // Data different from previous days: + // 24 connections, 1 negative report, 1 positive report + date += 1; + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::BridgeIps, + date, + 24, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::NegativeReports, + date, + 1, + ); + bridge_info.info_by_country.get_mut("ru").unwrap().add_info( + BridgeInfoType::PositiveReports, + date, + 1, + ); + + blocking_countries.insert("ru".to_string()); + + // This should be blocked because it's different in the bad direction. + assert_eq!( + blocked_in(&analyzer, &bridge_info, confidence, date), + blocking_countries + ); + } }