2024-02-07 18:36:40 -05:00
|
|
|
/*! Fields we need from the extra-info documents for bridges...
|
|
|
|
Note, this is NOT a complete implementation of the document format.
|
|
|
|
(https://spec.torproject.org/dir-spec/extra-info-document-format.html) */
|
|
|
|
|
|
|
|
use chrono::DateTime;
|
|
|
|
use julianday::JulianDay;
|
|
|
|
use serde::{Deserialize, Serialize};
|
2024-04-06 12:02:53 -04:00
|
|
|
use std::collections::{BTreeMap, HashMap, HashSet};
|
2024-02-07 18:36:40 -05:00
|
|
|
|
|
|
|
/// Fields we need from extra-info document
|
|
|
|
#[derive(Eq, PartialEq, Hash, Serialize, Deserialize)]
|
|
|
|
pub struct ExtraInfo {
|
|
|
|
/// Bridge nickname, probably unused
|
|
|
|
pub nickname: String,
|
2024-02-25 17:42:30 -05:00
|
|
|
|
2024-02-07 18:36:40 -05:00
|
|
|
/// Bridge fingerprint, a SHA-1 hash of the bridge ID
|
|
|
|
pub fingerprint: [u8; 20],
|
2024-02-25 17:42:30 -05:00
|
|
|
|
2024-02-25 17:38:37 -05:00
|
|
|
/// Date (in UTC) that this document covered (bridge-stats-end if
|
|
|
|
/// available) or that the document was published (published), stored
|
|
|
|
/// as a Julian date because we don't need to know more precisely than
|
|
|
|
/// the day.
|
|
|
|
pub date: u32,
|
2024-02-25 17:42:30 -05:00
|
|
|
|
2024-02-07 18:36:40 -05:00
|
|
|
/// Map of country codes and how many users (rounded up to a multiple of
|
|
|
|
/// 8) have connected to that bridge during the day.
|
|
|
|
/// Uses BTreeMap instead of HashMap so ExtraInfo can implement Hash.
|
|
|
|
pub bridge_ips: BTreeMap<String, u32>, // TODO: What size for count?
|
|
|
|
}
|
|
|
|
|
2024-04-06 12:02:53 -04:00
|
|
|
impl ExtraInfo {
|
|
|
|
/// Converts a map of keys and values into an ExtraInfo if all necessary fields
|
|
|
|
/// are represented.
|
|
|
|
fn from_map(entry: &HashMap<String, String>) -> Result<Self, String> {
|
|
|
|
if !entry.contains_key("nickname") || !entry.contains_key("fingerprint") {
|
|
|
|
// How did we get here??
|
|
|
|
return Err("Cannot parse extra-info: Missing nickname or fingerprint".to_string());
|
2024-02-07 18:36:40 -05:00
|
|
|
}
|
2024-04-06 12:02:53 -04:00
|
|
|
if !(entry.contains_key("bridge-stats-end") || entry.contains_key("published"))
|
|
|
|
|| !entry.contains_key("bridge-ips")
|
|
|
|
{
|
|
|
|
// Some extra-infos are missing data on connecting IPs...
|
|
|
|
// But we can't do anything in that case.
|
|
|
|
return Err(format!(
|
|
|
|
"Failed to parse extra-info for {} {}",
|
|
|
|
entry.get("nickname").unwrap(),
|
|
|
|
entry.get("fingerprint").unwrap()
|
|
|
|
));
|
2024-02-07 18:36:40 -05:00
|
|
|
}
|
2024-04-06 12:02:53 -04:00
|
|
|
let nickname = entry.get("nickname").unwrap().to_string();
|
|
|
|
let fingerprint_str = entry.get("fingerprint").unwrap();
|
|
|
|
if fingerprint_str.len() != 40 {
|
|
|
|
return Err("Fingerprint must be 20 bytes".to_string());
|
2024-02-07 18:36:40 -05:00
|
|
|
}
|
2024-04-06 12:02:53 -04:00
|
|
|
let fingerprint = array_bytes::hex2array(fingerprint_str).unwrap();
|
|
|
|
let date: u32 = {
|
|
|
|
let date_str = if entry.contains_key("bridge-stats-end") {
|
|
|
|
let line = entry.get("bridge-stats-end").unwrap();
|
|
|
|
// Parse out (86400 s) from end of line
|
|
|
|
&line[..line.find("(").unwrap() - 1]
|
|
|
|
} else {
|
|
|
|
entry.get("published").unwrap().as_str()
|
|
|
|
};
|
|
|
|
JulianDay::from(
|
|
|
|
DateTime::parse_from_str(&(date_str.to_owned() + " +0000"), "%F %T %z")
|
|
|
|
.unwrap()
|
|
|
|
.date_naive(),
|
|
|
|
)
|
|
|
|
.inner()
|
|
|
|
.try_into()
|
|
|
|
.unwrap()
|
|
|
|
};
|
|
|
|
let bridge_ips_str = entry.get("bridge-ips").unwrap();
|
|
|
|
let mut bridge_ips: BTreeMap<String, u32> = BTreeMap::new();
|
|
|
|
let countries: Vec<&str> = bridge_ips_str.split(',').collect();
|
|
|
|
for country in countries {
|
|
|
|
if country != "" {
|
|
|
|
// bridge-ips may be empty
|
|
|
|
let (cc, count) = country.split_once('=').unwrap();
|
|
|
|
bridge_ips.insert(cc.to_string(), count.parse::<u32>().unwrap());
|
2024-03-25 19:37:00 -04:00
|
|
|
}
|
|
|
|
}
|
2024-02-07 18:36:40 -05:00
|
|
|
|
2024-04-06 12:02:53 -04:00
|
|
|
Ok(Self {
|
|
|
|
nickname,
|
|
|
|
fingerprint,
|
|
|
|
date,
|
|
|
|
bridge_ips,
|
|
|
|
})
|
|
|
|
}
|
2024-02-07 18:36:40 -05:00
|
|
|
|
2024-04-06 12:02:53 -04:00
|
|
|
/// Accepts a downloaded extra-infos file as a big string, returns a set of
|
|
|
|
/// the ExtraInfos represented by the file.
|
|
|
|
pub fn parse_file<'a>(extra_info_str: &str) -> HashSet<Self> {
|
|
|
|
let mut set = HashSet::<Self>::new();
|
|
|
|
let mut entry = HashMap::<String, String>::new();
|
|
|
|
for line in extra_info_str.lines() {
|
|
|
|
let line = line;
|
|
|
|
if line.starts_with("@type bridge-extra-info ") {
|
|
|
|
if !entry.is_empty() {
|
|
|
|
let extra_info = Self::from_map(&entry);
|
|
|
|
if extra_info.is_ok() {
|
|
|
|
set.insert(extra_info.unwrap());
|
|
|
|
} else {
|
|
|
|
// Just print the error and continue.
|
|
|
|
println!("{}", extra_info.err().unwrap());
|
|
|
|
}
|
|
|
|
entry = HashMap::<String, String>::new();
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if line.starts_with("extra-info ") {
|
|
|
|
// extra-info line has format:
|
|
|
|
// extra-info <nickname> <fingerprint>
|
|
|
|
let line_split: Vec<&str> = line.split(' ').collect();
|
|
|
|
if line_split.len() != 3 {
|
|
|
|
println!("Misformed extra-info line");
|
|
|
|
} else {
|
|
|
|
entry.insert("nickname".to_string(), line_split[1].to_string());
|
|
|
|
entry.insert("fingerprint".to_string(), line_split[2].to_string());
|
2024-02-07 18:36:40 -05:00
|
|
|
}
|
2024-04-06 12:02:53 -04:00
|
|
|
} else {
|
|
|
|
let (key, value) = match line.split_once(' ') {
|
|
|
|
Some((k, v)) => (k, v),
|
|
|
|
None => (line, ""),
|
|
|
|
};
|
|
|
|
entry.insert(key.to_string(), value.to_string());
|
2024-02-07 18:36:40 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2024-04-06 12:02:53 -04:00
|
|
|
// Do for the last one
|
|
|
|
let extra_info = Self::from_map(&entry);
|
|
|
|
if extra_info.is_ok() {
|
|
|
|
set.insert(extra_info.unwrap());
|
|
|
|
} else {
|
|
|
|
println!("{}", extra_info.err().unwrap());
|
|
|
|
}
|
|
|
|
set
|
2024-02-07 18:36:40 -05:00
|
|
|
}
|
|
|
|
}
|