diff --git a/Cargo.toml b/Cargo.toml index 14443099..4bae39e5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,8 +20,10 @@ edition = "2018" [dependencies] blake2b_simd = { version = "0.5.9", default-features = false } blake2s_simd = { version = "0.5.9", default-features = false } -bytes = "0.5" sha1 = "0.5" sha2 = { version = "0.7", default-features = false } tiny-keccak = "1.4" unsigned-varint = "0.3" + +[dev-dependencies] +quickcheck = "0.9.2" diff --git a/src/lib.rs b/src/lib.rs index 829878db..9b2fc43c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,18 +8,22 @@ mod errors; mod hashes; +mod storage; use std::convert::TryFrom; +use std::fmt::Debug; +use std::hash; use blake2b_simd::{blake2b, Params as Blake2bVariable}; use blake2s_simd::{blake2s, Params as Blake2sVariable}; -use bytes::{BufMut, Bytes, BytesMut}; use sha2::Digest; use tiny_keccak::Keccak; use unsigned_varint::{decode, encode}; pub use errors::{DecodeError, DecodeOwnedError, EncodeError}; pub use hashes::Hash; +use std::fmt; +use storage::Storage; // Helper macro for encoding input into output using sha1, sha2, tiny_keccak, or blake2 macro_rules! encode { @@ -104,15 +108,8 @@ pub fn encode(hash: Hash, input: &[u8]) -> Result { let code = encode::u16(hash.code(), &mut buf); let mut len_buf = encode::u32_buffer(); let size = encode::u32(input.len() as u32, &mut len_buf); - - let total_len = code.len() + size.len() + input.len(); - - let mut output = BytesMut::with_capacity(total_len); - output.put_slice(code); - output.put_slice(size); - output.put_slice(input); Ok(Multihash { - bytes: output.freeze(), + storage: Storage::from_slices(&[&code, &size, &input]), }) } else { let (offset, mut output) = encode_hash(hash); @@ -135,31 +132,51 @@ pub fn encode(hash: Hash, input: &[u8]) -> Result { }); Ok(Multihash { - bytes: output.freeze(), + storage: Storage::from_slice(&output), }) } } -// Encode the given [`Hash`] value and ensure the returned [`BytesMut`] +// Encode the given [`Hash`] value and ensure the returned [`Vec`] // has enough capacity to hold the actual digest. -fn encode_hash(hash: Hash) -> (usize, BytesMut) { +fn encode_hash(hash: Hash) -> (usize, Vec) { let mut buf = encode::u16_buffer(); let code = encode::u16(hash.code(), &mut buf); let len = code.len() + 1 + usize::from(hash.size()); - let mut output = BytesMut::with_capacity(len); - output.put_slice(code); - output.put_u8(hash.size()); + let mut output = Vec::with_capacity(len); + output.extend_from_slice(code); + output.push(hash.size()); output.resize(len, 0); (code.len() + 1, output) } /// Represents a valid multihash. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Clone)] pub struct Multihash { - bytes: Bytes, + storage: Storage, +} + +impl Debug for Multihash { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("Multihash").field(&self.as_bytes()).finish() + } +} + +impl PartialEq for Multihash { + fn eq(&self, other: &Self) -> bool { + self.storage.bytes() == other.storage.bytes() + } +} + +impl Eq for Multihash {} + +impl hash::Hash for Multihash { + fn hash(&self, state: &mut H) { + self.storage.bytes().hash(state); + } } impl Multihash { @@ -172,7 +189,7 @@ impl Multihash { }); } Ok(Multihash { - bytes: Bytes::from(bytes), + storage: Storage::from_slice(&bytes), }) } @@ -183,17 +200,19 @@ impl Multihash { /// Returns the bytes representation of the multihash. pub fn to_vec(&self) -> Vec { - Vec::from(&self.bytes[..]) + Vec::from(self.as_bytes()) } /// Returns the bytes representation of this multihash. pub fn as_bytes(&self) -> &[u8] { - &self.bytes + self.storage.bytes() } /// Builds a `MultihashRef` corresponding to this `Multihash`. pub fn as_ref(&self) -> MultihashRef { - MultihashRef { bytes: &self.bytes } + MultihashRef { + bytes: self.as_bytes(), + } } /// Returns which hashing algorithm is used in this multihash. @@ -215,7 +234,7 @@ impl AsRef<[u8]> for Multihash { impl<'a> PartialEq> for Multihash { fn eq(&self, other: &MultihashRef<'a>) -> bool { - &*self.bytes == other.bytes + &*self.as_bytes() == other.as_bytes() } } @@ -290,7 +309,7 @@ impl<'a> MultihashRef<'a> { /// This operation allocates. pub fn to_owned(&self) -> Multihash { Multihash { - bytes: Bytes::copy_from_slice(self.bytes), + storage: Storage::from_slice(self.bytes), } } @@ -302,7 +321,7 @@ impl<'a> MultihashRef<'a> { impl<'a> PartialEq for MultihashRef<'a> { fn eq(&self, other: &Multihash) -> bool { - self.bytes == &*other.bytes + self.as_bytes() == &*other.as_bytes() } } diff --git a/src/storage.rs b/src/storage.rs new file mode 100644 index 00000000..835d7a07 --- /dev/null +++ b/src/storage.rs @@ -0,0 +1,107 @@ +use std::sync::Arc; + +/// MAX_INLINE is the maximum size of a multihash that can be stored inline +/// +/// We want the currently most common multihashes using 256bit hashes to be stored inline. These +/// hashes are 34 bytes long. An overall size of 38 seems like a good compromise. It allows storing +/// any 256bit hash with some room to spare and gives an overall size for Storage of 40 bytes, which +/// is a multiple of 8. We need 2 extra bytes, one for the size and one for the enum discriminator. +const MAX_INLINE: usize = 38; + +#[derive(Clone)] +pub(crate) enum Storage { + /// hash is stored inline if it is smaller than MAX_INLINE + Inline(u8, [u8; MAX_INLINE]), + /// hash is stored on the heap. this must be only used if the hash is actually larger than + /// MAX_INLINE bytes to ensure an unique representation. + Heap(Arc<[u8]>), +} + +impl Storage { + /// The raw bytes. + pub fn bytes(&self) -> &[u8] { + match self { + Storage::Inline(len, bytes) => &bytes[..(*len as usize)], + Storage::Heap(data) => &data, + } + } + + /// creates storage from a vec. For a size up to MAX_INLINE, this will not allocate. + pub fn from_slice(slice: &[u8]) -> Self { + let len = slice.len(); + if len <= MAX_INLINE { + let mut data: [u8; MAX_INLINE] = [0; MAX_INLINE]; + data[..len].copy_from_slice(slice); + Storage::Inline(len as u8, data) + } else { + Storage::Heap(slice.into()) + } + } + + /// creates storage from multiple slices. For a size up to MAX_INLINE, this will not allocate. + pub fn from_slices(slices: &[&[u8]]) -> Self { + let n = slices.iter().fold(0usize, |a, s| a.saturating_add(s.len())); + if n <= MAX_INLINE { + let s = slices + .iter() + .fold(([0; MAX_INLINE], 0), |(mut array, i), s| { + array[i..i + s.len()].copy_from_slice(s); + (array, i + s.len()) + }); + Storage::Inline(n as u8, s.0) + } else { + let mut v = Vec::with_capacity(n); + for s in slices { + v.extend_from_slice(s) + } + Storage::Heap(v.into()) + } + } +} + +#[cfg(test)] +mod tests { + use super::{Storage, MAX_INLINE}; + use quickcheck::quickcheck; + + #[test] + fn struct_size() { + // this should be true for both 32 and 64 bit archs + assert_eq!(std::mem::size_of::(), 40); + } + + #[test] + fn roundtrip() { + // check that .bytes() returns whatever the storage was created with + for i in 0..((MAX_INLINE + 10) as u8) { + let data = (0..i).collect::>(); + let storage = Storage::from_slice(&data); + assert_eq!(data, storage.bytes()); + } + } + + fn check_invariants(storage: Storage) -> bool { + match storage { + Storage::Inline(len, _) => len as usize <= MAX_INLINE, + Storage::Heap(arc) => arc.len() > MAX_INLINE, + } + } + + quickcheck! { + fn roundtrip_check(data: Vec) -> bool { + let storage = Storage::from_slice(&data); + storage.bytes() == data.as_slice() && check_invariants(storage) + } + + fn from_slices_roundtrip_check(data: Vec>) -> bool { + let mut slices = Vec::new(); + let mut expected = Vec::new(); + for v in data.iter() { + slices.push(v.as_slice()); + expected.extend_from_slice(&v); + } + let storage = Storage::from_slices(&slices); + storage.bytes() == expected.as_slice() && check_invariants(storage) + } + } +}