Skip to content

Commit b9522c9

Browse files
v1.14: removes false_positive_rate field from Deduper (backport of #30788) (#30846)
* removes false_positive_rate field from Deduper (#30788) removes the false_positive_rate field from the Deduper Deduper.false_positive_rate field is misleading because it is not enforced until maybe_reset is called. But then maybe_reset can be invoked with an explicit argument. (cherry picked from commit 46614c0) # Conflicts: # perf/src/deduper.rs * resolves mergify merge conflicts --------- Co-authored-by: behzad nouri <behzadnouri@gmail.com>
1 parent 54bc70f commit b9522c9

File tree

4 files changed

+85
-46
lines changed

4 files changed

+85
-46
lines changed

core/src/shred_fetch_stage.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,7 @@ impl ShredFetchStage {
4848
) {
4949
const STATS_SUBMIT_CADENCE: Duration = Duration::from_secs(1);
5050
let mut rng = rand::thread_rng();
51-
let mut deduper =
52-
Deduper::<2>::new(&mut rng, DEDUPER_FALSE_POSITIVE_RATE, DEDUPER_NUM_BITS);
51+
let mut deduper = Deduper::<2>::new(&mut rng, DEDUPER_NUM_BITS);
5352
let mut last_updated = Instant::now();
5453
let mut keypair = repair_context
5554
.as_ref()
@@ -64,7 +63,7 @@ impl ShredFetchStage {
6463
let mut stats = ShredFetchStats::default();
6564

6665
for mut packet_batch in recvr {
67-
deduper.maybe_reset(&mut rng, &DEDUPER_RESET_CYCLE);
66+
deduper.maybe_reset(&mut rng, DEDUPER_FALSE_POSITIVE_RATE, DEDUPER_RESET_CYCLE);
6867
if last_updated.elapsed().as_millis() as u64 > DEFAULT_MS_PER_SLOT {
6968
last_updated = Instant::now();
7069
{
@@ -300,7 +299,7 @@ mod tests {
300299
fn test_data_code_same_index() {
301300
solana_logger::setup();
302301
let mut rng = rand::thread_rng();
303-
let deduper = Deduper::<2>::new(&mut rng, DEDUPER_FALSE_POSITIVE_RATE, 640_007);
302+
let deduper = Deduper::<2>::new(&mut rng, /*num_bits:*/ 640_007);
304303
let mut packet = Packet::default();
305304
let mut stats = ShredFetchStats::default();
306305

@@ -352,7 +351,7 @@ mod tests {
352351
fn test_shred_filter() {
353352
solana_logger::setup();
354353
let mut rng = rand::thread_rng();
355-
let deduper = Deduper::<2>::new(&mut rng, DEDUPER_FALSE_POSITIVE_RATE, 640_007);
354+
let deduper = Deduper::<2>::new(&mut rng, /*num_bits:*/ 640_007);
356355
let mut packet = Packet::default();
357356
let mut stats = ShredFetchStats::default();
358357
let last_root = 0;

core/src/sigverify_stage.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -416,10 +416,9 @@ impl SigVerifyStage {
416416
.name("solSigVerifier".to_string())
417417
.spawn(move || {
418418
let mut rng = rand::thread_rng();
419-
let mut deduper =
420-
Deduper::<2>::new(&mut rng, DEDUPER_FALSE_POSITIVE_RATE, DEDUPER_NUM_BITS);
419+
let mut deduper = Deduper::<2>::new(&mut rng, DEDUPER_NUM_BITS);
421420
loop {
422-
deduper.maybe_reset(&mut rng, &MAX_DEDUPER_AGE);
421+
deduper.maybe_reset(&mut rng, DEDUPER_FALSE_POSITIVE_RATE, MAX_DEDUPER_AGE);
423422
if let Err(e) =
424423
Self::verifier(&deduper, &packet_receiver, &mut verifier, &mut stats)
425424
{

perf/benches/dedup.rs

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,14 @@ fn test_packet_with_size(size: usize, rng: &mut ThreadRng) -> Vec<u8> {
2525
fn do_bench_dedup_packets(bencher: &mut Bencher, mut batches: Vec<PacketBatch>) {
2626
// verify packets
2727
let mut rng = rand::thread_rng();
28-
let mut deduper = Deduper::<2>::new(
29-
&mut rng, /*false_positive_rate:*/ 0.001, /*num_bits:*/ 63_999_979,
30-
);
28+
let mut deduper = Deduper::<2>::new(&mut rng, /*num_bits:*/ 63_999_979);
3129
bencher.iter(|| {
3230
let _ans = deduper.dedup_packets_and_count_discards(&mut batches, |_, _, _| ());
33-
deduper.maybe_reset(&mut rng, /*reset_cycle:*/ &Duration::from_secs(2));
31+
deduper.maybe_reset(
32+
&mut rng,
33+
0.001, // false_positive_rate
34+
Duration::from_secs(2), // reset_cycle
35+
);
3436
batches
3537
.iter_mut()
3638
.for_each(|b| b.iter_mut().for_each(|p| p.meta.set_discard(false)));
@@ -116,10 +118,12 @@ fn bench_dedup_baseline(bencher: &mut Bencher) {
116118
#[ignore]
117119
fn bench_dedup_reset(bencher: &mut Bencher) {
118120
let mut rng = rand::thread_rng();
119-
let mut deduper = Deduper::<2>::new(
120-
&mut rng, /*false_positive_rate:*/ 0.001, /*num_bits:*/ 63_999_979,
121-
);
121+
let mut deduper = Deduper::<2>::new(&mut rng, /*num_bits:*/ 63_999_979);
122122
bencher.iter(|| {
123-
deduper.maybe_reset(&mut rng, /*reset_cycle:*/ &Duration::from_millis(0));
123+
deduper.maybe_reset(
124+
&mut rng,
125+
0.001, // false_positive_rate
126+
Duration::from_millis(0), // reset_cycle
127+
);
124128
});
125129
}

perf/src/sigverify.rs

Lines changed: 67 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -500,35 +500,46 @@ pub struct Deduper<const K: usize> {
500500
bits: Vec<AtomicU64>,
501501
seeds: [(u128, u128); K],
502502
clock: Instant,
503-
// Maximum number of one bits before the false positive
504-
// rate exceeds the specified threshold.
505-
capacity: u64,
506503
popcount: AtomicU64, // Number of one bits in self.bits.
507504
}
508505

509506
impl<const K: usize> Deduper<K> {
510-
pub fn new<R: Rng>(rng: &mut R, false_positive_rate: f64, num_bits: u64) -> Self {
511-
assert!(0.0 < false_positive_rate && false_positive_rate < 1.0);
512-
let size = usize::try_from(num_bits.checked_add(63).unwrap() / 64).unwrap();
513-
let capacity = num_bits as f64 * false_positive_rate.powf(1f64 / K as f64);
507+
pub fn new<R: Rng>(rng: &mut R, num_bits: u64) -> Self {
508+
let size = num_bits.checked_add(63).unwrap() / 64;
509+
let size = usize::try_from(size).unwrap();
514510
Self {
515511
num_bits,
516512
seeds: [(); K].map(|_| rng.gen()),
517513
clock: Instant::now(),
518514
bits: repeat_with(AtomicU64::default).take(size).collect(),
519-
capacity: capacity as u64,
520515
popcount: AtomicU64::default(),
521516
}
522517
}
523518

524-
pub fn maybe_reset<R: Rng>(&mut self, rng: &mut R, reset_cycle: &Duration) {
519+
fn false_positive_rate(&self) -> f64 {
525520
let popcount = self.popcount.load(Ordering::Relaxed);
526-
if popcount >= self.capacity || &self.clock.elapsed() >= reset_cycle {
521+
let ones_ratio = popcount.min(self.num_bits) as f64 / self.num_bits as f64;
522+
ones_ratio.powi(K as i32)
523+
}
524+
525+
/// Resets the Deduper if either it is older than the reset_cycle or it is
526+
/// saturated enough that false positive rate exceeds specified threshold.
527+
/// Returns true if the deduper was saturated.
528+
pub fn maybe_reset<R: Rng>(
529+
&mut self,
530+
rng: &mut R,
531+
false_positive_rate: f64,
532+
reset_cycle: Duration,
533+
) -> bool {
534+
assert!(0.0 < false_positive_rate && false_positive_rate < 1.0);
535+
let saturated = self.false_positive_rate() >= false_positive_rate;
536+
if saturated || self.clock.elapsed() >= reset_cycle {
527537
self.seeds = [(); K].map(|_| rng.gen());
528538
self.clock = Instant::now();
529539
self.bits.fill_with(AtomicU64::default);
530540
self.popcount = AtomicU64::default();
531541
}
542+
saturated
532543
}
533544

534545
// Returns true if the packet is duplicate.
@@ -1595,9 +1606,7 @@ mod tests {
15951606
to_packet_batches(&std::iter::repeat(tx).take(1024).collect::<Vec<_>>(), 128);
15961607
let packet_count = sigverify::count_packets_in_batches(&batches);
15971608
let mut rng = rand::thread_rng();
1598-
let filter = Deduper::<2>::new(
1599-
&mut rng, /*false_positive_rate:*/ 0.001, /*num_bits:*/ 63_999_979,
1600-
);
1609+
let filter = Deduper::<2>::new(&mut rng, /*num_bits:*/ 63_999_979);
16011610
let mut num_deduped = 0;
16021611
let discard = filter.dedup_packets_and_count_discards(
16031612
&mut batches,
@@ -1612,46 +1621,57 @@ mod tests {
16121621
#[test]
16131622
fn test_dedup_diff() {
16141623
let mut rng = rand::thread_rng();
1615-
let mut filter = Deduper::<2>::new(
1616-
&mut rng, /*false_positive_rate:*/ 0.001, /*num_bits:*/ 63_999_979,
1617-
);
1624+
let mut filter = Deduper::<2>::new(&mut rng, /*num_bits:*/ 63_999_979);
16181625
let mut batches = to_packet_batches(&(0..1024).map(|_| test_tx()).collect::<Vec<_>>(), 128);
16191626
let discard = filter.dedup_packets_and_count_discards(&mut batches, |_, _, _| ()) as usize;
16201627
// because dedup uses a threadpool, there maybe up to N threads of txs that go through
16211628
assert_eq!(discard, 0);
1622-
filter.maybe_reset(&mut rng, /*reset_cycle:*/ &Duration::from_millis(0));
1629+
assert!(!filter.maybe_reset(
1630+
&mut rng,
1631+
0.001, // false_positive_rate
1632+
Duration::from_millis(0), // reset_cycle
1633+
));
16231634
for i in filter.bits {
16241635
assert_eq!(i.load(Ordering::Relaxed), 0);
16251636
}
16261637
}
16271638

1639+
fn get_capacity<const K: usize>(num_bits: u64, false_positive_rate: f64) -> u64 {
1640+
(num_bits as f64 * false_positive_rate.powf(1f64 / K as f64)) as u64
1641+
}
1642+
16281643
#[test]
16291644
#[ignore]
16301645
fn test_dedup_saturated() {
1646+
const NUM_BITS: u64 = 63_999_979;
1647+
const FALSE_POSITIVE_RATE: f64 = 0.001;
16311648
let mut rng = rand::thread_rng();
1632-
let filter = Deduper::<2>::new(
1633-
&mut rng, /*false_positive_rate:*/ 0.001, /*num_bits:*/ 63_999_979,
1634-
);
1649+
let mut filter = Deduper::<2>::new(&mut rng, NUM_BITS);
1650+
let capacity = get_capacity::<2>(NUM_BITS, FALSE_POSITIVE_RATE);
16351651
let mut discard = 0;
1636-
assert!(filter.popcount.load(Ordering::Relaxed) < filter.capacity);
1652+
assert!(filter.popcount.load(Ordering::Relaxed) < capacity);
16371653
for i in 0..1000 {
16381654
let mut batches =
16391655
to_packet_batches(&(0..1000).map(|_| test_tx()).collect::<Vec<_>>(), 128);
16401656
discard += filter.dedup_packets_and_count_discards(&mut batches, |_, _, _| ()) as usize;
16411657
trace!("{} {}", i, discard);
1642-
if filter.popcount.load(Ordering::Relaxed) >= filter.capacity {
1658+
if filter.popcount.load(Ordering::Relaxed) > capacity {
16431659
break;
16441660
}
16451661
}
1646-
assert!(filter.popcount.load(Ordering::Relaxed) >= filter.capacity);
1662+
assert!(filter.popcount.load(Ordering::Relaxed) > capacity);
1663+
assert!(filter.false_positive_rate() >= FALSE_POSITIVE_RATE);
1664+
assert!(filter.maybe_reset(
1665+
&mut rng,
1666+
FALSE_POSITIVE_RATE,
1667+
Duration::from_millis(0), // reset_cycle
1668+
));
16471669
}
16481670

16491671
#[test]
16501672
fn test_dedup_false_positive() {
16511673
let mut rng = rand::thread_rng();
1652-
let filter = Deduper::<2>::new(
1653-
&mut rng, /*false_positive_rate:*/ 0.001, /*num_bits:*/ 63_999_979,
1654-
);
1674+
let filter = Deduper::<2>::new(&mut rng, /*num_bits:*/ 63_999_979);
16551675
let mut discard = 0;
16561676
for i in 0..10 {
16571677
let mut batches =
@@ -1676,8 +1696,18 @@ mod tests {
16761696
#[test_case(637_534_199, 0.0001, 6_375_341)]
16771697
fn test_dedup_capacity(num_bits: u64, false_positive_rate: f64, capacity: u64) {
16781698
let mut rng = rand::thread_rng();
1679-
let deduper = Deduper::<2>::new(&mut rng, false_positive_rate, num_bits);
1680-
assert_eq!(deduper.capacity, capacity);
1699+
assert_eq!(get_capacity::<2>(num_bits, false_positive_rate), capacity);
1700+
let mut deduper = Deduper::<2>::new(&mut rng, num_bits);
1701+
assert_eq!(deduper.false_positive_rate(), 0.0);
1702+
deduper.popcount.store(capacity, Ordering::Relaxed);
1703+
assert!(deduper.false_positive_rate() < false_positive_rate);
1704+
deduper.popcount.store(capacity + 1, Ordering::Relaxed);
1705+
assert!(deduper.false_positive_rate() >= false_positive_rate);
1706+
assert!(deduper.maybe_reset(
1707+
&mut rng,
1708+
false_positive_rate,
1709+
Duration::from_millis(0), // reset_cycle
1710+
));
16811711
}
16821712

16831713
#[test_case([0xf9; 32], 3_199_997, 101_192, 51_414, 70, 101_125)]
@@ -1694,9 +1724,10 @@ mod tests {
16941724
num_dups: usize,
16951725
popcount: u64,
16961726
) {
1727+
const FALSE_POSITIVE_RATE: f64 = 0.001;
16971728
let mut rng = ChaChaRng::from_seed(seed);
1698-
let deduper = Deduper::<2>::new(&mut rng, /*false_positive_rate:*/ 0.001, num_bits);
1699-
assert_eq!(deduper.capacity, capacity);
1729+
let mut deduper = Deduper::<2>::new(&mut rng, num_bits);
1730+
assert_eq!(get_capacity::<2>(num_bits, FALSE_POSITIVE_RATE), capacity);
17001731
let mut packet = Packet::new([0u8; PACKET_DATA_SIZE], Meta::default());
17011732
let mut dup_count = 0usize;
17021733
for _ in 0..num_packets {
@@ -1710,6 +1741,12 @@ mod tests {
17101741
}
17111742
assert_eq!(dup_count, num_dups);
17121743
assert_eq!(deduper.popcount.load(Ordering::Relaxed), popcount);
1744+
assert!(deduper.false_positive_rate() < FALSE_POSITIVE_RATE);
1745+
assert!(!deduper.maybe_reset(
1746+
&mut rng,
1747+
FALSE_POSITIVE_RATE,
1748+
Duration::from_millis(0), // reset_cycle
1749+
));
17131750
}
17141751

17151752
#[test]

0 commit comments

Comments
 (0)