24
24
} ,
25
25
std:: {
26
26
convert:: TryFrom ,
27
- hash:: { Hash as _ , Hasher } ,
27
+ hash:: Hasher ,
28
28
iter:: repeat_with,
29
+ marker:: PhantomData ,
29
30
mem:: size_of,
30
31
sync:: atomic:: { AtomicU64 , Ordering } ,
31
32
time:: { Duration , Instant } ,
@@ -495,15 +496,16 @@ pub fn generate_offsets(
495
496
)
496
497
}
497
498
498
- pub struct Deduper < const K : usize > {
499
+ pub struct Deduper < const K : usize , T : ? Sized > {
499
500
num_bits : u64 ,
500
501
bits : Vec < AtomicU64 > ,
501
502
seeds : [ ( u128 , u128 ) ; K ] ,
502
503
clock : Instant ,
503
504
popcount : AtomicU64 , // Number of one bits in self.bits.
505
+ _phantom : PhantomData < T > ,
504
506
}
505
507
506
- impl < const K : usize > Deduper < K > {
508
+ impl < const K : usize , T : ? Sized + std :: hash :: Hash > Deduper < K , T > {
507
509
pub fn new < R : Rng > ( rng : & mut R , num_bits : u64 ) -> Self {
508
510
let size = num_bits. checked_add ( 63 ) . unwrap ( ) / 64 ;
509
511
let size = usize:: try_from ( size) . unwrap ( ) ;
@@ -513,6 +515,7 @@ impl<const K: usize> Deduper<K> {
513
515
clock : Instant :: now ( ) ,
514
516
bits : repeat_with ( AtomicU64 :: default) . take ( size) . collect ( ) ,
515
517
popcount : AtomicU64 :: default ( ) ,
518
+ _phantom : PhantomData :: < T > :: default ( ) ,
516
519
}
517
520
}
518
521
@@ -542,16 +545,14 @@ impl<const K: usize> Deduper<K> {
542
545
saturated
543
546
}
544
547
545
- // Returns true if the packet is duplicate.
548
+ // Returns true if the data is duplicate.
546
549
#[ must_use]
547
550
#[ allow( clippy:: integer_arithmetic) ]
548
- pub fn dedup_packet ( & self , packet : & Packet ) -> bool {
549
- // Should not dedup packet if already discarded.
550
- debug_assert ! ( !packet. meta. discard( ) ) ;
551
+ pub fn dedup ( & self , data : & T ) -> bool {
551
552
let mut out = true ;
552
553
for seed in self . seeds {
553
554
let mut hasher = AHasher :: new_with_keys ( seed. 0 , seed. 1 ) ;
554
- packet . data ( .. ) . unwrap_or_default ( ) . hash ( & mut hasher) ;
555
+ data. hash ( & mut hasher) ;
555
556
let hash: u64 = hasher. finish ( ) % self . num_bits ;
556
557
let index = ( hash >> 6 ) as usize ;
557
558
let mask: u64 = 1u64 << ( hash & 63 ) ;
@@ -563,28 +564,32 @@ impl<const K: usize> Deduper<K> {
563
564
}
564
565
out
565
566
}
567
+ }
566
568
567
- pub fn dedup_packets_and_count_discards (
568
- & self ,
569
- batches : & mut [ PacketBatch ] ,
570
- mut process_received_packet : impl FnMut ( & mut Packet , bool , bool ) ,
571
- ) -> u64 {
572
- batches
573
- . iter_mut ( )
574
- . flat_map ( PacketBatch :: iter_mut)
575
- . map ( |packet| {
576
- if packet. meta . discard ( ) {
577
- process_received_packet ( packet, true , false ) ;
578
- } else if self . dedup_packet ( packet) {
579
- packet. meta . set_discard ( true ) ;
580
- process_received_packet ( packet, false , true ) ;
581
- } else {
582
- process_received_packet ( packet, false , false ) ;
583
- }
584
- u64:: from ( packet. meta . discard ( ) )
585
- } )
586
- . sum ( )
587
- }
569
+ pub fn dedup_packets_and_count_discards < const K : usize > (
570
+ deduper : & Deduper < K , [ u8 ] > ,
571
+ batches : & mut [ PacketBatch ] ,
572
+ mut process_received_packet : impl FnMut ( & mut Packet , bool , bool ) ,
573
+ ) -> u64 {
574
+ batches
575
+ . iter_mut ( )
576
+ . flat_map ( PacketBatch :: iter_mut)
577
+ . map ( |packet| {
578
+ if packet. meta . discard ( ) {
579
+ process_received_packet ( packet, true , false ) ;
580
+ } else if packet
581
+ . data ( ..)
582
+ . map ( |data| deduper. dedup ( data) )
583
+ . unwrap_or ( true )
584
+ {
585
+ packet. meta . set_discard ( true ) ;
586
+ process_received_packet ( packet, false , true ) ;
587
+ } else {
588
+ process_received_packet ( packet, false , false ) ;
589
+ }
590
+ u64:: from ( packet. meta . discard ( ) )
591
+ } )
592
+ . sum ( )
588
593
}
589
594
590
595
//inplace shrink a batch of packets
@@ -1606,9 +1611,10 @@ mod tests {
1606
1611
to_packet_batches ( & std:: iter:: repeat ( tx) . take ( 1024 ) . collect :: < Vec < _ > > ( ) , 128 ) ;
1607
1612
let packet_count = sigverify:: count_packets_in_batches ( & batches) ;
1608
1613
let mut rng = rand:: thread_rng ( ) ;
1609
- let filter = Deduper :: < 2 > :: new ( & mut rng, /*num_bits:*/ 63_999_979 ) ;
1614
+ let filter = Deduper :: < 2 , [ u8 ] > :: new ( & mut rng, /*num_bits:*/ 63_999_979 ) ;
1610
1615
let mut num_deduped = 0 ;
1611
- let discard = filter. dedup_packets_and_count_discards (
1616
+ let discard = dedup_packets_and_count_discards (
1617
+ & filter,
1612
1618
& mut batches,
1613
1619
|_deduped_packet, _removed_before_sigverify_stage, _is_dup| {
1614
1620
num_deduped += 1 ;
@@ -1621,9 +1627,10 @@ mod tests {
1621
1627
#[ test]
1622
1628
fn test_dedup_diff ( ) {
1623
1629
let mut rng = rand:: thread_rng ( ) ;
1624
- let mut filter = Deduper :: < 2 > :: new ( & mut rng, /*num_bits:*/ 63_999_979 ) ;
1630
+ let mut filter = Deduper :: < 2 , [ u8 ] > :: new ( & mut rng, /*num_bits:*/ 63_999_979 ) ;
1625
1631
let mut batches = to_packet_batches ( & ( 0 ..1024 ) . map ( |_| test_tx ( ) ) . collect :: < Vec < _ > > ( ) , 128 ) ;
1626
- let discard = filter. dedup_packets_and_count_discards ( & mut batches, |_, _, _| ( ) ) as usize ;
1632
+ let discard =
1633
+ dedup_packets_and_count_discards ( & filter, & mut batches, |_, _, _| ( ) ) as usize ;
1627
1634
// because dedup uses a threadpool, there maybe up to N threads of txs that go through
1628
1635
assert_eq ! ( discard, 0 ) ;
1629
1636
assert ! ( !filter. maybe_reset(
@@ -1646,14 +1653,15 @@ mod tests {
1646
1653
const NUM_BITS : u64 = 63_999_979 ;
1647
1654
const FALSE_POSITIVE_RATE : f64 = 0.001 ;
1648
1655
let mut rng = rand:: thread_rng ( ) ;
1649
- let mut filter = Deduper :: < 2 > :: new ( & mut rng, NUM_BITS ) ;
1656
+ let mut filter = Deduper :: < 2 , [ u8 ] > :: new ( & mut rng, NUM_BITS ) ;
1650
1657
let capacity = get_capacity :: < 2 > ( NUM_BITS , FALSE_POSITIVE_RATE ) ;
1651
1658
let mut discard = 0 ;
1652
1659
assert ! ( filter. popcount. load( Ordering :: Relaxed ) < capacity) ;
1653
1660
for i in 0 ..1000 {
1654
1661
let mut batches =
1655
1662
to_packet_batches ( & ( 0 ..1000 ) . map ( |_| test_tx ( ) ) . collect :: < Vec < _ > > ( ) , 128 ) ;
1656
- discard += filter. dedup_packets_and_count_discards ( & mut batches, |_, _, _| ( ) ) as usize ;
1663
+ discard +=
1664
+ dedup_packets_and_count_discards ( & filter, & mut batches, |_, _, _| ( ) ) as usize ;
1657
1665
trace ! ( "{} {}" , i, discard) ;
1658
1666
if filter. popcount . load ( Ordering :: Relaxed ) > capacity {
1659
1667
break ;
@@ -1671,12 +1679,13 @@ mod tests {
1671
1679
#[ test]
1672
1680
fn test_dedup_false_positive ( ) {
1673
1681
let mut rng = rand:: thread_rng ( ) ;
1674
- let filter = Deduper :: < 2 > :: new ( & mut rng, /*num_bits:*/ 63_999_979 ) ;
1682
+ let filter = Deduper :: < 2 , [ u8 ] > :: new ( & mut rng, /*num_bits:*/ 63_999_979 ) ;
1675
1683
let mut discard = 0 ;
1676
1684
for i in 0 ..10 {
1677
1685
let mut batches =
1678
1686
to_packet_batches ( & ( 0 ..1024 ) . map ( |_| test_tx ( ) ) . collect :: < Vec < _ > > ( ) , 128 ) ;
1679
- discard += filter. dedup_packets_and_count_discards ( & mut batches, |_, _, _| ( ) ) as usize ;
1687
+ discard +=
1688
+ dedup_packets_and_count_discards ( & filter, & mut batches, |_, _, _| ( ) ) as usize ;
1680
1689
debug ! ( "false positive rate: {}/{}" , discard, i * 1024 ) ;
1681
1690
}
1682
1691
//allow for 1 false positive even if extremely unlikely
@@ -1697,7 +1706,7 @@ mod tests {
1697
1706
fn test_dedup_capacity ( num_bits : u64 , false_positive_rate : f64 , capacity : u64 ) {
1698
1707
let mut rng = rand:: thread_rng ( ) ;
1699
1708
assert_eq ! ( get_capacity:: <2 >( num_bits, false_positive_rate) , capacity) ;
1700
- let mut deduper = Deduper :: < 2 > :: new ( & mut rng, num_bits) ;
1709
+ let mut deduper = Deduper :: < 2 , [ u8 ] > :: new ( & mut rng, num_bits) ;
1701
1710
assert_eq ! ( deduper. false_positive_rate( ) , 0.0 ) ;
1702
1711
deduper. popcount . store ( capacity, Ordering :: Relaxed ) ;
1703
1712
assert ! ( deduper. false_positive_rate( ) < false_positive_rate) ;
@@ -1726,18 +1735,18 @@ mod tests {
1726
1735
) {
1727
1736
const FALSE_POSITIVE_RATE : f64 = 0.001 ;
1728
1737
let mut rng = ChaChaRng :: from_seed ( seed) ;
1729
- let mut deduper = Deduper :: < 2 > :: new ( & mut rng, num_bits) ;
1738
+ let mut deduper = Deduper :: < 2 , [ u8 ] > :: new ( & mut rng, num_bits) ;
1730
1739
assert_eq ! ( get_capacity:: <2 >( num_bits, FALSE_POSITIVE_RATE ) , capacity) ;
1731
1740
let mut packet = Packet :: new ( [ 0u8 ; PACKET_DATA_SIZE ] , Meta :: default ( ) ) ;
1732
1741
let mut dup_count = 0usize ;
1733
1742
for _ in 0 ..num_packets {
1734
1743
let size = rng. gen_range ( 0 , PACKET_DATA_SIZE ) ;
1735
1744
packet. meta . size = size;
1736
1745
rng. fill ( & mut packet. buffer_mut ( ) [ 0 ..size] ) ;
1737
- if deduper. dedup_packet ( & packet) {
1746
+ if deduper. dedup ( packet. data ( .. ) . unwrap ( ) ) {
1738
1747
dup_count += 1 ;
1739
1748
}
1740
- assert ! ( deduper. dedup_packet ( & packet) ) ;
1749
+ assert ! ( deduper. dedup ( packet. data ( .. ) . unwrap ( ) ) ) ;
1741
1750
}
1742
1751
assert_eq ! ( dup_count, num_dups) ;
1743
1752
assert_eq ! ( deduper. popcount. load( Ordering :: Relaxed ) , popcount) ;
0 commit comments