Skip to content

Commit 70d7623

Browse files
authored
Merge pull request #517 from Chia-Network/optimize-compression
Optimize `ReadCacheLookup`, 6.7 speed-up
2 parents 68d87a2 + 32bed63 commit 70d7623

File tree

6 files changed

+141
-35
lines changed

6 files changed

+141
-35
lines changed

Cargo.lock

Lines changed: 41 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ serde = "1.0.214"
6161
serde_json = "1.0.133"
6262
clap = "4.5.20"
6363
rand_chacha = "0.3.1"
64+
bitvec = "1.0.1"
6465

6566
[dependencies]
6667
lazy_static = { workspace = true }
@@ -70,11 +71,13 @@ num-integer = { workspace = true }
7071
chia-bls = { workspace = true }
7172
chia-sha2 = { workspace = true }
7273
hex-literal = { workspace = true }
74+
bitvec = { workspace = true }
7375
# for secp sigs
7476
k256 = { version = "0.13.4", features = ["ecdsa"] }
7577
p256 = { version = "0.13.2", features = ["ecdsa"] }
7678
# for keccak256
7779
sha3 = "0.10.8"
80+
rand = { workspace = true }
7881

7982
[dev-dependencies]
8083
rstest = { workspace = true }

benches/serialize.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ fn serialize_benchmark(c: &mut Criterion) {
1616
let block2: &[u8] = include_bytes!("2.generator");
1717

1818
let mut group = c.benchmark_group("serialize");
19-
group.sample_size(10);
2019

2120
for (block, name) in [(&block0, "0"), (&block1, "1"), (&block2, "2")] {
2221
let mut a = Allocator::new();

src/serde/identity_hash.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
use rand::Rng;
2+
use std::hash::{BuildHasher, Hasher};
3+
4+
#[derive(Default, Clone, Copy)]
5+
pub struct IdentityHash(u64, u64);
6+
7+
impl IdentityHash {
8+
fn new(salt: u64) -> Self {
9+
Self(0, salt)
10+
}
11+
}
12+
13+
impl Hasher for IdentityHash {
14+
fn finish(&self) -> u64 {
15+
self.0
16+
}
17+
18+
fn write(&mut self, bytes: &[u8]) {
19+
self.0 =
20+
u64::from_le_bytes(bytes[0..8].try_into().expect("expected 32 byte hashes")) ^ self.1;
21+
}
22+
23+
fn write_u64(&mut self, _i: u64) {
24+
panic!("This hasher only takes bytes");
25+
}
26+
}
27+
28+
pub struct RandomState(u64);
29+
30+
impl Default for RandomState {
31+
fn default() -> Self {
32+
let mut rng = rand::thread_rng();
33+
Self(rng.gen())
34+
}
35+
}
36+
37+
impl BuildHasher for RandomState {
38+
type Hasher = IdentityHash;
39+
40+
fn build_hasher(&self) -> Self::Hasher {
41+
IdentityHash::new(self.0)
42+
}
43+
}

src/serde/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ mod de;
33
mod de_br;
44
mod de_tree;
55
mod errors;
6+
mod identity_hash;
67
mod incremental;
78
mod object_cache;
89
mod parse_atom;
@@ -20,6 +21,7 @@ mod test;
2021
pub use de::node_from_bytes;
2122
pub use de_br::{node_from_bytes_backrefs, node_from_bytes_backrefs_record};
2223
pub use de_tree::{parse_triples, ParsedTriple};
24+
pub use identity_hash::RandomState;
2325
pub use incremental::{Serializer, UndoState};
2426
pub use object_cache::{serialized_length, treehash, ObjectCache};
2527
pub use ser::{node_to_bytes, node_to_bytes_limit};

src/serde/read_cache_lookup.rs

Lines changed: 52 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
use crate::serde::RandomState;
2+
use bitvec::prelude::*;
3+
use bitvec::vec::BitVec;
14
/// When deserializing a clvm object, a stack of deserialized child objects
25
/// is created, which can be used with back-references. A `ReadCacheLookup` keeps
36
/// track of the state of this stack and all child objects under each root
@@ -28,10 +31,10 @@ pub struct ReadCacheLookup {
2831
/// the tree hashes of the contents on the left and right
2932
read_stack: Vec<(Bytes32, Bytes32)>,
3033

31-
count: HashMap<Bytes32, u32>,
34+
count: HashMap<Bytes32, u32, RandomState>,
3235

3336
/// a mapping of tree hashes to `(parent, is_right)` tuples
34-
parent_lookup: HashMap<Bytes32, Vec<(Bytes32, u8)>>,
37+
parent_lookup: HashMap<Bytes32, Vec<(Bytes32, bool)>, RandomState>,
3538
}
3639

3740
impl Default for ReadCacheLookup {
@@ -43,10 +46,12 @@ impl Default for ReadCacheLookup {
4346
impl ReadCacheLookup {
4447
pub fn new() -> Self {
4548
let root_hash = hash_blob(&[1]);
46-
let read_stack = vec![];
47-
let mut count = HashMap::default();
49+
let read_stack = Vec::with_capacity(1000);
50+
// all keys in count and parent_lookup are tree-hashes. There's no need
51+
// to hash them again for the hash map
52+
let mut count = HashMap::with_hasher(RandomState::default());
4853
count.insert(root_hash, 1);
49-
let parent_lookup = HashMap::default();
54+
let parent_lookup = HashMap::with_hasher(RandomState::default());
5055
Self {
5156
root_hash,
5257
read_stack,
@@ -67,13 +72,13 @@ impl ReadCacheLookup {
6772
*self.count.entry(id).or_insert(0) += 1;
6873
*self.count.entry(new_root_hash).or_insert(0) += 1;
6974

70-
let new_parent_to_old_root = (new_root_hash, 0);
75+
let new_parent_to_old_root = (new_root_hash, false);
7176
self.parent_lookup
7277
.entry(id)
7378
.or_default()
7479
.push(new_parent_to_old_root);
7580

76-
let new_parent_to_id = (new_root_hash, 1);
81+
let new_parent_to_id = (new_root_hash, true);
7782
self.parent_lookup
7883
.entry(self.root_hash)
7984
.or_default()
@@ -108,31 +113,41 @@ impl ReadCacheLookup {
108113
self.parent_lookup
109114
.entry(left.0)
110115
.or_default()
111-
.push((new_root_hash, 0));
116+
.push((new_root_hash, false));
112117

113118
self.parent_lookup
114119
.entry(right.0)
115120
.or_default()
116-
.push((new_root_hash, 1));
121+
.push((new_root_hash, true));
117122

118123
self.push(new_root_hash);
119124
}
120125

121126
/// return the list of minimal-length paths to the given hash which will serialize to no larger
122127
/// than the given size (or an empty list if no such path exists)
123128
pub fn find_paths(&self, id: &Bytes32, serialized_length: u64) -> Vec<Vec<u8>> {
124-
let mut seen_ids = HashSet::<&Bytes32>::default();
125-
let mut possible_responses = vec![];
126-
if serialized_length < 3 {
127-
return possible_responses;
129+
// this function is not cheap. only keep going if there's potential to
130+
// save enough bytes
131+
if serialized_length < 4 {
132+
return vec![];
128133
}
129-
assert!(serialized_length > 2);
134+
135+
let mut possible_responses = Vec::with_capacity(50);
136+
137+
// all the values we put in this hash set are themselves sha256 hashes.
138+
// There's no point in hashing the hashes
139+
let mut seen_ids = HashSet::<&Bytes32, RandomState>::with_capacity_and_hasher(
140+
1000,
141+
RandomState::default(),
142+
);
143+
130144
let max_bytes_for_path_encoding = serialized_length - 2; // 1 byte for 0xfe, 1 min byte for savings
131145
let max_path_length: usize = (max_bytes_for_path_encoding.saturating_mul(8) - 1)
132146
.try_into()
133147
.unwrap_or(usize::MAX);
134148
seen_ids.insert(id);
135-
let mut partial_paths = vec![(*id, vec![])];
149+
let mut partial_paths = Vec::with_capacity(500);
150+
partial_paths.push((*id, BitVec::with_capacity(100)));
136151

137152
while !partial_paths.is_empty() {
138153
let mut new_partial_paths = vec![];
@@ -147,11 +162,11 @@ impl ReadCacheLookup {
147162
for (parent, direction) in items.iter() {
148163
if *(self.count.get(parent).unwrap_or(&0)) > 0 && !seen_ids.contains(parent)
149164
{
150-
let mut new_path = path.clone();
151-
new_path.push(*direction);
152-
if new_path.len() > max_path_length {
165+
if path.len() + 1 > max_path_length {
153166
return possible_responses;
154167
}
168+
let mut new_path = path.clone();
169+
new_path.push(*direction);
155170
new_partial_paths.push((*parent, new_path));
156171
}
157172
seen_ids.insert(parent);
@@ -185,13 +200,13 @@ impl ReadCacheLookup {
185200
/// If `A` => `v` then `[A] + [0]` => `v * 2` and `[A] + [1]` => `v * 2 + 1`
186201
/// Then the integer is turned into the minimal-length array of `u8` representing
187202
/// that value as an unsigned integer.
188-
fn reversed_path_to_vec_u8(path: &[u8]) -> Vec<u8> {
203+
fn reversed_path_to_vec_u8(path: &BitSlice) -> Vec<u8> {
189204
let byte_count = (path.len() + 1 + 7) >> 3;
190205
let mut v = vec![0; byte_count];
191206
let mut index = byte_count - 1;
192207
let mut mask: u8 = 1;
193208
for p in path.iter().rev() {
194-
if *p != 0 {
209+
if p != false {
195210
v[index] |= mask;
196211
}
197212
mask = {
@@ -213,30 +228,33 @@ mod tests {
213228

214229
#[test]
215230
fn test_path_to_vec_u8() {
216-
assert_eq!(reversed_path_to_vec_u8(&[]), vec!(0b1));
217-
assert_eq!(reversed_path_to_vec_u8(&[0]), vec!(0b10));
218-
assert_eq!(reversed_path_to_vec_u8(&[1]), vec!(0b11));
219-
assert_eq!(reversed_path_to_vec_u8(&[0, 0]), vec!(0b100));
220-
assert_eq!(reversed_path_to_vec_u8(&[0, 1]), vec!(0b101));
221-
assert_eq!(reversed_path_to_vec_u8(&[1, 0]), vec!(0b110));
222-
assert_eq!(reversed_path_to_vec_u8(&[1, 1]), vec!(0b111));
223-
assert_eq!(reversed_path_to_vec_u8(&[1, 1, 1]), vec!(0b1111));
224-
assert_eq!(reversed_path_to_vec_u8(&[0, 1, 1, 1]), vec!(0b10111));
225-
assert_eq!(reversed_path_to_vec_u8(&[1, 0, 1, 1, 1]), vec!(0b110111));
231+
assert_eq!(reversed_path_to_vec_u8(bits![]), vec!(0b1));
232+
assert_eq!(reversed_path_to_vec_u8(bits![0]), vec!(0b10));
233+
assert_eq!(reversed_path_to_vec_u8(bits![1]), vec!(0b11));
234+
assert_eq!(reversed_path_to_vec_u8(bits![0, 0]), vec!(0b100));
235+
assert_eq!(reversed_path_to_vec_u8(bits![0, 1]), vec!(0b101));
236+
assert_eq!(reversed_path_to_vec_u8(bits![1, 0]), vec!(0b110));
237+
assert_eq!(reversed_path_to_vec_u8(bits![1, 1]), vec!(0b111));
238+
assert_eq!(reversed_path_to_vec_u8(bits![1, 1, 1]), vec!(0b1111));
239+
assert_eq!(reversed_path_to_vec_u8(bits![0, 1, 1, 1]), vec!(0b10111));
240+
assert_eq!(
241+
reversed_path_to_vec_u8(bits![1, 0, 1, 1, 1]),
242+
vec!(0b110111)
243+
);
226244
assert_eq!(
227-
reversed_path_to_vec_u8(&[1, 1, 0, 1, 1, 1]),
245+
reversed_path_to_vec_u8(bits![1, 1, 0, 1, 1, 1]),
228246
vec!(0b1110111)
229247
);
230248
assert_eq!(
231-
reversed_path_to_vec_u8(&[0, 1, 1, 0, 1, 1, 1]),
249+
reversed_path_to_vec_u8(bits![0, 1, 1, 0, 1, 1, 1]),
232250
vec!(0b10110111)
233251
);
234252
assert_eq!(
235-
reversed_path_to_vec_u8(&[0, 0, 1, 1, 0, 1, 1, 1]),
253+
reversed_path_to_vec_u8(bits![0, 0, 1, 1, 0, 1, 1, 1]),
236254
vec!(0b1, 0b00110111)
237255
);
238256
assert_eq!(
239-
reversed_path_to_vec_u8(&[1, 0, 0, 1, 1, 0, 1, 1, 1]),
257+
reversed_path_to_vec_u8(bits![1, 0, 0, 1, 1, 0, 1, 1, 1]),
240258
vec!(0b11, 0b00110111)
241259
);
242260
}

0 commit comments

Comments
 (0)