Fix bug in DupFilter that breaks serialize/deserialize

This commit is contained in:
onyinyang 2023-09-07 11:12:40 -04:00
parent 2d9febaf14
commit 794f80ab11
No known key found for this signature in database
GPG Key ID: 156A6435430C2036
2 changed files with 29 additions and 7 deletions

File diff suppressed because one or more lines are too long

View File

@ -4,8 +4,7 @@
This implementation just keeps the table of seen ids in memory, but a
production one would of course use a disk-backed database. */
use std::cmp::Eq;
use std::collections::HashMap;
use std::collections::HashSet;
use std::hash::Hash;
use serde::{Deserialize, Serialize};
@ -14,7 +13,7 @@ use serde::{Deserialize, Serialize};
/// seen ids. IdType will typically be Scalar.
#[derive(Default, Debug, Serialize, Deserialize)]
pub struct DupFilter<IdType: Hash + Eq + Copy + Serialize> {
seen_table: HashMap<IdType, ()>,
seen_table: HashSet<IdType>,
}
/// A return type indicating whether the item was fresh (not previously
@ -30,7 +29,7 @@ impl<IdType: Hash + Eq + Copy + Serialize> DupFilter<IdType> {
/// to the seen table. Return Seen if it is already in the table,
/// Fresh if not.
pub fn check(&self, id: &IdType) -> SeenType {
if self.seen_table.contains_key(id) {
if self.seen_table.contains(id) {
SeenType::Seen
} else {
SeenType::Fresh
@ -41,9 +40,9 @@ impl<IdType: Hash + Eq + Copy + Serialize> DupFilter<IdType> {
/// table, and add it if not. Return Fresh if it was not already
/// in the table, and Seen if it was.
pub fn filter(&mut self, id: &IdType) -> SeenType {
match self.seen_table.insert(*id, ()) {
None => SeenType::Fresh,
Some(()) => SeenType::Seen,
match self.seen_table.insert(*id) {
true => SeenType::Fresh,
false => SeenType::Seen,
}
}
}