troll-patrol/src/extra_info.rs

173 lines
6.6 KiB
Rust
Raw Normal View History

2024-02-07 18:36:40 -05:00
/*! Fields we need from the extra-info documents for bridges...
Note, this is NOT a complete implementation of the document format.
(https://spec.torproject.org/dir-spec/extra-info-document-format.html) */
use chrono::DateTime;
use julianday::JulianDay;
use serde::{Deserialize, Serialize};
2024-04-27 17:28:33 -04:00
use std::{
collections::{BTreeMap, HashMap, HashSet},
fmt,
};
2024-02-07 18:36:40 -05:00
/// Fields we need from extra-info document
2024-04-27 17:28:33 -04:00
#[derive(Clone, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)]
2024-02-07 18:36:40 -05:00
pub struct ExtraInfo {
/// Bridge nickname, probably unused
pub nickname: String,
2024-02-07 18:36:40 -05:00
/// Bridge fingerprint, a SHA-1 hash of the bridge ID
pub fingerprint: [u8; 20],
/// Date (in UTC) that this document covered (bridge-stats-end if
/// available) or that the document was published (published), stored
/// as a Julian date because we don't need to know more precisely than
/// the day.
pub date: u32,
2024-02-07 18:36:40 -05:00
/// Map of country codes and how many users (rounded up to a multiple of
/// 8) have connected to that bridge during the day.
/// Uses BTreeMap instead of HashMap so ExtraInfo can implement Hash.
2024-04-26 15:25:56 -04:00
pub bridge_ips: BTreeMap<String, u32>,
2024-02-07 18:36:40 -05:00
}
impl ExtraInfo {
/// Converts a map of keys and values into an ExtraInfo if all necessary fields
/// are represented.
fn from_map(entry: &HashMap<String, String>) -> Result<Self, String> {
if !entry.contains_key("nickname") || !entry.contains_key("fingerprint") {
// How did we get here??
return Err("Cannot parse extra-info: Missing nickname or fingerprint".to_string());
2024-02-07 18:36:40 -05:00
}
if !(entry.contains_key("bridge-stats-end") || entry.contains_key("published"))
|| !entry.contains_key("bridge-ips")
{
// Some extra-infos are missing data on connecting IPs...
// But we can't do anything in that case.
return Err(format!(
"Failed to parse extra-info for {} {}",
entry.get("nickname").unwrap(),
entry.get("fingerprint").unwrap()
));
2024-02-07 18:36:40 -05:00
}
let nickname = entry.get("nickname").unwrap().to_string();
let fingerprint_str = entry.get("fingerprint").unwrap();
if fingerprint_str.len() != 40 {
return Err("Fingerprint must be 20 bytes".to_string());
2024-02-07 18:36:40 -05:00
}
let fingerprint = array_bytes::hex2array(fingerprint_str).unwrap();
let date: u32 = {
let date_str = if entry.contains_key("bridge-stats-end") {
let line = entry.get("bridge-stats-end").unwrap();
// Parse out (86400 s) from end of line
&line[..line.find('(').unwrap() - 1]
} else {
entry.get("published").unwrap().as_str()
};
JulianDay::from(
DateTime::parse_from_str(&(date_str.to_owned() + " +0000"), "%F %T %z")
.unwrap()
.date_naive(),
)
.inner()
.try_into()
.unwrap()
};
let bridge_ips_str = entry.get("bridge-ips").unwrap();
let mut bridge_ips: BTreeMap<String, u32> = BTreeMap::new();
let countries: Vec<&str> = bridge_ips_str.split(',').collect();
for country in countries {
if !country.is_empty() {
// bridge-ips may be empty
let (cc, count) = country.split_once('=').unwrap();
bridge_ips.insert(cc.to_string(), count.parse::<u32>().unwrap());
}
}
2024-02-07 18:36:40 -05:00
Ok(Self {
nickname,
fingerprint,
date,
bridge_ips,
})
}
2024-02-07 18:36:40 -05:00
/// Accepts a downloaded extra-infos file as a big string, returns a set of
/// the ExtraInfos represented by the file.
pub fn parse_file(extra_info_str: &str) -> HashSet<Self> {
let mut set = HashSet::<Self>::new();
let mut entry = HashMap::<String, String>::new();
for line in extra_info_str.lines() {
if line.starts_with("@type bridge-extra-info ") {
if !entry.is_empty() {
let extra_info = Self::from_map(&entry);
if let Ok(ei) = extra_info {
set.insert(ei);
} else {
// Just print the error and continue.
println!("{}", extra_info.err().unwrap());
}
entry = HashMap::<String, String>::new();
}
} else if line.starts_with("extra-info ") {
// extra-info line has format:
// extra-info <nickname> <fingerprint>
let line_split: Vec<&str> = line.split(' ').collect();
if line_split.len() != 3 {
println!("Misformed extra-info line");
} else {
entry.insert("nickname".to_string(), line_split[1].to_string());
entry.insert("fingerprint".to_string(), line_split[2].to_string());
2024-02-07 18:36:40 -05:00
}
} else {
let (key, value) = match line.split_once(' ') {
Some((k, v)) => (k, v),
None => (line, ""),
};
entry.insert(key.to_string(), value.to_string());
2024-02-07 18:36:40 -05:00
}
}
// Do for the last one
let extra_info = Self::from_map(&entry);
if let Ok(ei) = extra_info {
set.insert(ei);
} else {
println!("{}", extra_info.err().unwrap());
}
set
2024-02-07 18:36:40 -05:00
}
}
2024-04-27 17:28:33 -04:00
/// Convert the ExtraInfo object to a string record, as in a downloaded file
impl fmt::Display for ExtraInfo {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut str = String::from("@type bridge-extra-info 1.3");
str.push_str(
format!(
"\nextra-info {} {}",
self.nickname,
array_bytes::bytes2hex("", self.fingerprint).to_uppercase()
)
.as_str(),
);
let date = JulianDay::new(self.date.try_into().unwrap()).to_date();
str.push_str(format!("\nbridge-stats-end {} 23:59:59 (86400 s)", date).as_str());
str.push_str(format!("\npublished {} 23:59:59", date).as_str());
// These should be sorted in descending order by count, but that's not
// necessary for our purposes.
str.push_str("\nbridge-ips ");
let mut first_cc = true;
for (cc, count) in &self.bridge_ips {
if !first_cc {
str.push(',');
}
str.push_str(format!("{}={}", cc, count,).as_str());
first_cc = false;
}
str.push('\n');
2024-04-27 17:28:33 -04:00
write!(f, "{}", str)
}
}