Use bridge-stats-end if available, refactor db to counts of reports

This commit is contained in:
Vecna 2024-02-25 17:38:37 -05:00
parent cc86baa4b5
commit cde22fef67
2 changed files with 93 additions and 72 deletions

View File

@ -26,9 +26,11 @@ pub struct ExtraInfo {
pub nickname: String,
/// Bridge fingerprint, a SHA-1 hash of the bridge ID
pub fingerprint: [u8; 20],
/// Date (in UTC) that this document was published, stored as a Julian
/// date because we don't need to know more precisely than the day.
pub published: u32,
/// Date (in UTC) that this document covered (bridge-stats-end if
/// available) or that the document was published (published), stored
/// as a Julian date because we don't need to know more precisely than
/// the day.
pub date: u32,
/// Map of country codes and how many users (rounded up to a multiple of
/// 8) have connected to that bridge during the day.
/// Uses BTreeMap instead of HashMap so ExtraInfo can implement Hash.
@ -40,7 +42,7 @@ fn get_extra_info_or_error(entry: &HashMap<String, String>) -> Result<ExtraInfo,
// How did we get here??
return Err("Cannot parse extra-info: Missing nickname or fingerprint".to_string());
}
if !entry.contains_key("published") || !entry.contains_key("bridge-ips") {
if !(entry.contains_key("bridge-stats-end") || entry.contains_key("published")) || !entry.contains_key("bridge-ips") {
// Some extra-infos are missing data on connecting IPs...
// But we can't do anything in that case.
return Err(format!(
@ -55,17 +57,26 @@ fn get_extra_info_or_error(entry: &HashMap<String, String>) -> Result<ExtraInfo,
return Err("Fingerprint must be 20 bytes".to_string());
}
let fingerprint = array_bytes::hex2array(fingerprint_str).unwrap();
let published: u32 = JulianDay::from(
DateTime::parse_from_str(
&(entry.get("published").unwrap().to_owned() + " +0000"),
"%F %T %z",
let date: u32 = {
let date_str = if entry.contains_key("bridge-stats-end") {
let line = entry.get("bridge-stats-end").unwrap();
// Parse out (86400 s) from end of line
&line[..line.find("(").unwrap()-1]
} else {
entry.get("published").unwrap().as_str()
};
JulianDay::from(
DateTime::parse_from_str(
&(date_str.to_owned() + " +0000"),
"%F %T %z",
)
.unwrap()
.date_naive(),
)
.inner()
.try_into()
.unwrap()
.date_naive(),
)
.inner()
.try_into()
.unwrap();
};
let bridge_ips_str = entry.get("bridge-ips").unwrap();
let mut bridge_ips: BTreeMap<String, u32> = BTreeMap::new();
let countries: Vec<&str> = bridge_ips_str.split(',').collect();
@ -80,7 +91,7 @@ fn get_extra_info_or_error(entry: &HashMap<String, String>) -> Result<ExtraInfo,
Ok(ExtraInfo {
nickname,
fingerprint,
published,
date,
bridge_ips,
})
}

View File

@ -74,10 +74,10 @@ pub struct BridgeInfo {
}
impl BridgeInfo {
pub fn new(fingerprint: [u8; 20], nickname: String) -> Self {
pub fn new(fingerprint: [u8; 20], nickname: &String) -> Self {
Self {
fingerprint: fingerprint,
nickname: nickname,
nickname: nickname.to_string(),
first_seen: get_date(),
is_blocked: false,
info_by_day: HashMap::<u32, DailyBridgeInfo>::new(),
@ -106,49 +106,79 @@ impl fmt::Display for BridgeInfo {
}
}
// TODO: Should this be an enum to make it easier to implement different
// versions for plugins?
#[derive(Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
pub enum BridgeInfoType {
BridgeIps,
NegativeReports,
PositiveReports,
}
/// Information about bridge reachability, gathered daily
#[derive(Serialize, Deserialize)]
pub struct DailyBridgeInfo {
/// Map of country codes and how many users (rounded up to a multiple of
/// 8) have connected to that bridge during the day.
pub bridge_ips: BTreeMap<String, u32>,
/// Map of negative reports to count of negative reports received
pub negative_reports: BTreeMap<SerializableNegativeReport, u32>,
/// Set of positive reports received during this day
pub positive_reports: Vec<SerializablePositiveReport>,
// We don't care about ordering of the reports, but I'm using vectors for
// reports because we don't want a set to deduplicate our reports, and
// I don't want to implement Hash or Ord. Another possibility might be a
// map of the report to the number of that exact report we received.
// Positive reports include a Lox proof and should be unique, but negative
// reports could be deduplicated.
pub info_by_country: BTreeMap<String, BTreeMap<BridgeInfoType, u32>>,
}
impl DailyBridgeInfo {
pub fn new() -> Self {
Self {
bridge_ips: BTreeMap::<String, u32>::new(),
negative_reports: BTreeMap::<SerializableNegativeReport, u32>::new(),
positive_reports: Vec::<SerializablePositiveReport>::new(),
info_by_country: BTreeMap::<String, BTreeMap::<BridgeInfoType, u32>>::new(),
}
}
pub fn add_info(&mut self, info_type: BridgeInfoType, count_per_country: &BTreeMap::<String, u32>) {
for country in count_per_country.keys() {
if self.info_by_country.contains_key(country) {
let info = self.info_by_country.get_mut(country).unwrap();
if !info.contains_key(&info_type) {
info.insert(info_type, *count_per_country.get(&country.to_string()).unwrap());
} else if info_type == BridgeInfoType::BridgeIps {
// Use newest value we've seen today
if info.get(&info_type).unwrap() < count_per_country.get(country).unwrap() {
info.insert(BridgeInfoType::BridgeIps, *count_per_country.get(&country.to_string()).unwrap());
}
} else {
let new_count = info.get(&info_type).unwrap() + *count_per_country.get(&country.to_string()).unwrap();
info.insert(info_type, new_count);
}
} else {
let mut info = BTreeMap::<BridgeInfoType, u32>::new();
info.insert(info_type, *count_per_country.get(&country.to_string()).unwrap());
self.info_by_country.insert(country.to_string(), info);
}
}
}
}
impl fmt::Display for DailyBridgeInfo {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut str = String::from("bridge_ips:");
for country in self.bridge_ips.keys() {
str.push_str(
format!(
"\n cc: {}, connections: {}",
country,
self.bridge_ips.get(country).unwrap()
)
.as_str(),
);
let mut str = String::from("info:");
for country in self.info_by_country.keys() {
let info = self.info_by_country.get(country).unwrap();
let ip_count = match info.get(&BridgeInfoType::BridgeIps) {
Some(v) => v,
None => &0,
};
let nr_count = match info.get(&BridgeInfoType::NegativeReports) {
Some(v) => v,
None => &0,
};
let pr_count = match info.get(&BridgeInfoType::PositiveReports) {
Some(v) => v,
None => &0,
};
if ip_count > &0 || nr_count > &0 || pr_count > &0 {
str.push_str(
format!(
"\n cc: {}\n connections: {}\n negative reports: {}\n positive reports: {}",
country,
ip_count,
nr_count,
pr_count,
)
.as_str(),
);
}
}
write!(f, "{}", str)
}
@ -162,45 +192,25 @@ pub fn add_extra_info_to_db(db: &Db, extra_info: ExtraInfo) {
let fingerprint = extra_info.fingerprint;
let mut bridge_info = match db.get(&fingerprint).unwrap() {
Some(v) => bincode::deserialize(&v).unwrap(),
None => BridgeInfo::new(fingerprint, extra_info.nickname),
None => BridgeInfo::new(fingerprint, &extra_info.nickname),
};
// If we already have an entry, compare it with the new one. For each
// country:count mapping, use the greater of the two counts.
if bridge_info.info_by_day.contains_key(&extra_info.published) {
if bridge_info.info_by_day.contains_key(&extra_info.date) {
let daily_bridge_info = bridge_info
.info_by_day
.get_mut(&extra_info.published)
.get_mut(&extra_info.date)
.unwrap();
if extra_info.bridge_ips != daily_bridge_info.bridge_ips {
for country in extra_info.bridge_ips.keys() {
if daily_bridge_info.bridge_ips.contains_key(country) {
// Use greatest value we've seen today
if daily_bridge_info.bridge_ips.get(country).unwrap()
< extra_info.bridge_ips.get(country).unwrap()
{
daily_bridge_info.bridge_ips.insert(
country.to_string(),
*extra_info.bridge_ips.get(country).unwrap(),
);
}
} else {
daily_bridge_info.bridge_ips.insert(
country.to_string(),
*extra_info.bridge_ips.get(country).unwrap(),
);
}
}
}
daily_bridge_info.add_info(BridgeInfoType::BridgeIps, &extra_info.bridge_ips);
} else {
// No existing entry; make a new one.
let daily_bridge_info = DailyBridgeInfo {
bridge_ips: extra_info.bridge_ips,
negative_reports: BTreeMap::<SerializableNegativeReport, u32>::new(),
positive_reports: Vec::<SerializablePositiveReport>::new(),
let mut daily_bridge_info = DailyBridgeInfo {
info_by_country: BTreeMap::<String, BTreeMap::<BridgeInfoType, u32>>::new(),
};
daily_bridge_info.add_info(BridgeInfoType::BridgeIps, &extra_info.bridge_ips);
bridge_info
.info_by_day
.insert(extra_info.published, daily_bridge_info);
.insert(extra_info.date, daily_bridge_info);
}
// Commit changes to database
db.insert(fingerprint, bincode::serialize(&bridge_info).unwrap())