Skip to content

Commit 20c5c89

Browse files
committed
Use inline storage for small hashes
1 parent d1214d5 commit 20c5c89

File tree

3 files changed

+116
-22
lines changed

3 files changed

+116
-22
lines changed

Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ edition = "2018"
2020
[dependencies]
2121
blake2b_simd = { version = "0.5.9", default-features = false }
2222
blake2s_simd = { version = "0.5.9", default-features = false }
23-
bytes = "0.5"
2423
sha1 = "0.5"
2524
sha2 = { version = "0.7", default-features = false }
2625
tiny-keccak = "1.4"

src/lib.rs

Lines changed: 73 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,22 @@
88
99
mod errors;
1010
mod hashes;
11+
mod storage;
1112

1213
use std::convert::TryFrom;
14+
use std::fmt::Debug;
15+
use std::hash;
1316

1417
use blake2b_simd::{blake2b, Params as Blake2bVariable};
1518
use blake2s_simd::{blake2s, Params as Blake2sVariable};
16-
use bytes::{BufMut, Bytes, BytesMut};
1719
use sha2::Digest;
1820
use tiny_keccak::Keccak;
1921
use unsigned_varint::{decode, encode};
2022

2123
pub use errors::{DecodeError, DecodeOwnedError, EncodeError};
2224
pub use hashes::Hash;
25+
use std::fmt;
26+
use storage::Storage;
2327

2428
// Helper macro for encoding input into output using sha1, sha2, tiny_keccak, or blake2
2529
macro_rules! encode {
@@ -107,12 +111,12 @@ pub fn encode(hash: Hash, input: &[u8]) -> Result<Multihash, EncodeError> {
107111

108112
let total_len = code.len() + size.len() + input.len();
109113

110-
let mut output = BytesMut::with_capacity(total_len);
111-
output.put_slice(code);
112-
output.put_slice(size);
113-
output.put_slice(input);
114+
let mut output = Vec::with_capacity(total_len);
115+
output.extend_from_slice(code);
116+
output.extend_from_slice(size);
117+
output.extend_from_slice(input);
114118
Ok(Multihash {
115-
bytes: output.freeze(),
119+
storage: Storage::copy_from_slice(&output),
116120
})
117121
} else {
118122
let (offset, mut output) = encode_hash(hash);
@@ -135,31 +139,51 @@ pub fn encode(hash: Hash, input: &[u8]) -> Result<Multihash, EncodeError> {
135139
});
136140

137141
Ok(Multihash {
138-
bytes: output.freeze(),
142+
storage: Storage::copy_from_slice(&output),
139143
})
140144
}
141145
}
142146

143-
// Encode the given [`Hash`] value and ensure the returned [`BytesMut`]
147+
// Encode the given [`Hash`] value and ensure the returned [`Vec<u8>`]
144148
// has enough capacity to hold the actual digest.
145-
fn encode_hash(hash: Hash) -> (usize, BytesMut) {
149+
fn encode_hash(hash: Hash) -> (usize, Vec<u8>) {
146150
let mut buf = encode::u16_buffer();
147151
let code = encode::u16(hash.code(), &mut buf);
148152

149153
let len = code.len() + 1 + usize::from(hash.size());
150154

151-
let mut output = BytesMut::with_capacity(len);
152-
output.put_slice(code);
153-
output.put_u8(hash.size());
155+
let mut output = Vec::with_capacity(len);
156+
output.extend_from_slice(code);
157+
output.push(hash.size());
154158
output.resize(len, 0);
155159

156160
(code.len() + 1, output)
157161
}
158162

159163
/// Represents a valid multihash.
160-
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
164+
#[derive(Clone)]
161165
pub struct Multihash {
162-
bytes: Bytes,
166+
storage: Storage,
167+
}
168+
169+
impl Debug for Multihash {
170+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
171+
write!(f, "Multihash")
172+
}
173+
}
174+
175+
impl PartialEq for Multihash {
176+
fn eq(&self, other: &Self) -> bool {
177+
self.storage.bytes() == other.storage.bytes()
178+
}
179+
}
180+
181+
impl Eq for Multihash {}
182+
183+
impl hash::Hash for Multihash {
184+
fn hash<H: hash::Hasher>(&self, state: &mut H) {
185+
self.storage.bytes().hash(state);
186+
}
163187
}
164188

165189
impl Multihash {
@@ -172,7 +196,7 @@ impl Multihash {
172196
});
173197
}
174198
Ok(Multihash {
175-
bytes: Bytes::from(bytes),
199+
storage: Storage::copy_from_slice(&bytes),
176200
})
177201
}
178202

@@ -183,17 +207,21 @@ impl Multihash {
183207

184208
/// Returns the bytes representation of the multihash.
185209
pub fn to_vec(&self) -> Vec<u8> {
186-
Vec::from(&self.bytes[..])
210+
Vec::from(self.as_bytes())
187211
}
188212

189213
/// Returns the bytes representation of this multihash.
190214
pub fn as_bytes(&self) -> &[u8] {
191-
&self.bytes
215+
let bytes = self.storage.bytes();
216+
let size = multihash_size(bytes).expect("storage contains a valid multihash");
217+
&bytes[..size]
192218
}
193219

194220
/// Builds a `MultihashRef` corresponding to this `Multihash`.
195221
pub fn as_ref(&self) -> MultihashRef {
196-
MultihashRef { bytes: &self.bytes }
222+
MultihashRef {
223+
bytes: self.as_bytes(),
224+
}
197225
}
198226

199227
/// Returns which hashing algorithm is used in this multihash.
@@ -215,7 +243,7 @@ impl AsRef<[u8]> for Multihash {
215243

216244
impl<'a> PartialEq<MultihashRef<'a>> for Multihash {
217245
fn eq(&self, other: &MultihashRef<'a>) -> bool {
218-
&*self.bytes == other.bytes
246+
&*self.as_bytes() == other.as_bytes()
219247
}
220248
}
221249

@@ -233,6 +261,30 @@ pub struct MultihashRef<'a> {
233261
bytes: &'a [u8],
234262
}
235263

264+
/// Given a buffer starting with a valid multihash, returns the size of the multihash
265+
fn multihash_size(input: &[u8]) -> Result<usize, DecodeError> {
266+
if input.is_empty() {
267+
return Err(DecodeError::BadInputLength);
268+
}
269+
let mut res = 0usize;
270+
271+
// Ensure `Hash::code` returns a `u16` so that our `decode::u16` here is correct.
272+
std::convert::identity::<fn(Hash) -> u16>(Hash::code);
273+
let (code, bytes) = decode::u16(&input).map_err(|_| DecodeError::BadInputLength)?;
274+
275+
// Very convoluted way to get the size of the code
276+
let mut tmp = [0u8; 3];
277+
res += unsigned_varint::encode::u16(code, &mut tmp).len();
278+
279+
let (hash_len, _) = decode::u32(&bytes).map_err(|_| DecodeError::BadInputLength)?;
280+
281+
// Very convoluted way to get the size of the hash_len
282+
let mut tmp = [0u8; 5];
283+
res += unsigned_varint::encode::u32(hash_len, &mut tmp).len();
284+
res += hash_len as usize;
285+
Ok(res)
286+
}
287+
236288
impl<'a> MultihashRef<'a> {
237289
/// Creates a `MultihashRef` from the given `input`.
238290
pub fn from_slice(input: &'a [u8]) -> Result<Self, DecodeError> {
@@ -290,7 +342,7 @@ impl<'a> MultihashRef<'a> {
290342
/// This operation allocates.
291343
pub fn to_owned(&self) -> Multihash {
292344
Multihash {
293-
bytes: Bytes::copy_from_slice(self.bytes),
345+
storage: Storage::copy_from_slice(self.bytes),
294346
}
295347
}
296348

@@ -302,7 +354,7 @@ impl<'a> MultihashRef<'a> {
302354

303355
impl<'a> PartialEq<Multihash> for MultihashRef<'a> {
304356
fn eq(&self, other: &Multihash) -> bool {
305-
self.bytes == &*other.bytes
357+
self.as_bytes() == &*other.as_bytes()
306358
}
307359
}
308360

src/storage.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
use std::sync::Arc;
2+
3+
const MAX_INLINE: usize = 39;
4+
5+
#[derive(Clone)]
6+
pub enum Storage {
7+
/// hash is stored inline. if it is smaller than 39 bytes it should be padded with 0u8
8+
Inline([u8; MAX_INLINE]),
9+
/// hash is stored on the heap. this must be only used if the hash is actually larger than
10+
/// 39 bytes to ensure an unique representation.
11+
Heap(Arc<[u8]>),
12+
}
13+
14+
impl Storage {
15+
/// The raw bytes. Note that this can be longer than the data this storage has been created from.
16+
pub fn bytes(&self) -> &[u8] {
17+
match self {
18+
Storage::Inline(bytes) => bytes,
19+
Storage::Heap(data) => &data,
20+
}
21+
}
22+
23+
/// creates storage from a vec. Note that this will not preserve the size.
24+
pub fn copy_from_slice(slice: &[u8]) -> Self {
25+
if slice.len() <= MAX_INLINE {
26+
let mut data: [u8; MAX_INLINE] = [0; MAX_INLINE];
27+
&data[..slice.len()].copy_from_slice(slice);
28+
Storage::Inline(data)
29+
} else {
30+
Storage::Heap(slice.into())
31+
}
32+
}
33+
}
34+
35+
#[cfg(test)]
36+
mod tests {
37+
use super::Storage;
38+
39+
#[test]
40+
fn test_size() {
41+
assert_eq!(std::mem::size_of::<Storage>(), 40);
42+
}
43+
}

0 commit comments

Comments
 (0)