From 4cfe7d201ed4d7ff17df311ca157fbdc8d6dc115 Mon Sep 17 00:00:00 2001 From: Volker Mische Date: Tue, 11 Feb 2020 14:52:50 +0100 Subject: [PATCH 1/4] chore: remove blank lines from Cargo.toml --- Cargo.toml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4bae39e5..174f9048 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,19 +2,12 @@ name = "multihash" description = "Implementation of the multihash format" repository = "https://github.com/multiformats/rust-multihash" - keywords = ["multihash", "ipfs"] - version = "0.9.4" - authors = ["dignifiedquire "] - license = "MIT" - readme = "README.md" - documentation = "https://docs.rs/multihash/" - edition = "2018" [dependencies] From 7bba92bdf502c93ef6514cf622ee7b2a8c452a9c Mon Sep 17 00:00:00 2001 From: Volker Mische Date: Fri, 21 Feb 2020 15:37:23 +0100 Subject: [PATCH 2/4] refactor: move contents from lib.rs to digests.rs --- src/digests.rs | 329 ++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 331 +------------------------------------------------ 2 files changed, 331 insertions(+), 329 deletions(-) create mode 100644 src/digests.rs diff --git a/src/digests.rs b/src/digests.rs new file mode 100644 index 00000000..f6d8ef92 --- /dev/null +++ b/src/digests.rs @@ -0,0 +1,329 @@ +use std::convert::TryFrom; +use std::{cmp, fmt, hash}; + +use blake2b_simd::{blake2b, Params as Blake2bVariable}; +use blake2s_simd::{blake2s, Params as Blake2sVariable}; +use sha2::Digest; +use tiny_keccak::Keccak; +use unsigned_varint::{decode, encode}; + +use crate::errors::{DecodeError, DecodeOwnedError, EncodeError}; +use crate::hashes::Hash; +use crate::storage::Storage; + +// Helper macro for encoding input into output using sha1, sha2, tiny_keccak, or blake2 +macro_rules! encode { + (sha1, Sha1, $input:expr, $output:expr) => {{ + let mut hasher = sha1::Sha1::new(); + hasher.update($input); + $output.copy_from_slice(&hasher.digest().bytes()); + }}; + (sha2, $algorithm:ident, $input:expr, $output:expr) => {{ + let mut hasher = sha2::$algorithm::default(); + hasher.input($input); + $output.copy_from_slice(hasher.result().as_ref()); + }}; + (tiny, $constructor:ident, $input:expr, $output:expr) => {{ + let mut kec = Keccak::$constructor(); + kec.update($input); + kec.finalize($output); + }}; + (blake2, $algorithm:ident, $input:expr, $output:expr) => {{ + let hash = $algorithm($input); + $output.copy_from_slice(hash.as_ref()); + }}; + (blake2_256, $constructor:ident, $input:expr, $output:expr) => {{ + let hash = $constructor::new() + .hash_length(32) + .to_state() + .update($input) + .finalize(); + $output.copy_from_slice(hash.as_ref()); + }}; + (blake2_128, $constructor:ident, $input:expr, $output:expr) => {{ + let hash = $constructor::new() + .hash_length(16) + .to_state() + .update($input) + .finalize(); + $output.copy_from_slice(hash.as_ref()); + }}; +} + +// And another one to keep the matching DRY +macro_rules! match_encoder { + ($hash:ident for ($input:expr, $output:expr) { + $( $hashtype:ident => $lib:ident :: $method:ident, )* + }) => ({ + match $hash { + $( + Hash::$hashtype => encode!($lib, $method, $input, $output), + )* + + _ => return Err(EncodeError::UnsupportedType) + } + }) +} + +/// Encodes data into a multihash. +/// +/// # Errors +/// +/// Will return an error if the specified hash type is not supported. See the docs for `Hash` +/// to see what is supported. +/// +/// # Examples +/// +/// ``` +/// use multihash::{encode, Hash}; +/// +/// assert_eq!( +/// encode(Hash::SHA2256, b"hello world").unwrap().to_vec(), +/// vec![18, 32, 185, 77, 39, 185, 147, 77, 62, 8, 165, 46, 82, 215, 218, 125, 171, 250, 196, +/// 132, 239, 227, 122, 83, 128, 238, 144, 136, 247, 172, 226, 239, 205, 233] +/// ); +/// ``` +/// +pub fn encode(hash: Hash, input: &[u8]) -> Result { + // Custom length encoding for the identity multihash + if let Hash::Identity = hash { + if u64::from(std::u32::MAX) < as_u64(input.len()) { + return Err(EncodeError::UnsupportedInputLength); + } + let mut buf = encode::u16_buffer(); + let code = encode::u16(hash.code(), &mut buf); + let mut len_buf = encode::u32_buffer(); + let size = encode::u32(input.len() as u32, &mut len_buf); + Ok(Multihash { + storage: Storage::from_slices(&[&code, &size, &input]), + }) + } else { + let (offset, mut output) = encode_hash(hash); + match_encoder!(hash for (input, &mut output[offset ..]) { + SHA1 => sha1::Sha1, + SHA2256 => sha2::Sha256, + SHA2512 => sha2::Sha512, + SHA3224 => tiny::new_sha3_224, + SHA3256 => tiny::new_sha3_256, + SHA3384 => tiny::new_sha3_384, + SHA3512 => tiny::new_sha3_512, + Keccak224 => tiny::new_keccak224, + Keccak256 => tiny::new_keccak256, + Keccak384 => tiny::new_keccak384, + Keccak512 => tiny::new_keccak512, + Blake2b512 => blake2::blake2b, + Blake2b256 => blake2_256::Blake2bVariable, + Blake2s256 => blake2::blake2s, + Blake2s128 => blake2_128::Blake2sVariable, + }); + + Ok(Multihash { + storage: Storage::from_slice(&output), + }) + } +} + +// Encode the given [`Hash`] value and ensure the returned [`Vec`] +// has enough capacity to hold the actual digest. +fn encode_hash(hash: Hash) -> (usize, Vec) { + let mut buf = encode::u16_buffer(); + let code = encode::u16(hash.code(), &mut buf); + + let len = code.len() + 1 + usize::from(hash.size()); + + let mut output = Vec::with_capacity(len); + output.extend_from_slice(code); + output.push(hash.size()); + output.resize(len, 0); + + (code.len() + 1, output) +} + +/// Represents a valid multihash. +#[derive(Clone)] +pub struct Multihash { + storage: Storage, +} + +impl fmt::Debug for Multihash { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("Multihash").field(&self.as_bytes()).finish() + } +} + +impl PartialEq for Multihash { + fn eq(&self, other: &Self) -> bool { + self.storage.bytes() == other.storage.bytes() + } +} + +impl Eq for Multihash {} + +impl hash::Hash for Multihash { + fn hash(&self, state: &mut H) { + self.storage.bytes().hash(state); + } +} + +impl Multihash { + /// Verifies whether `bytes` contains a valid multihash, and if so returns a `Multihash`. + pub fn from_bytes(bytes: Vec) -> Result { + if let Err(err) = MultihashRef::from_slice(&bytes) { + return Err(DecodeOwnedError { + error: err, + data: bytes, + }); + } + Ok(Multihash { + storage: Storage::from_slice(&bytes), + }) + } + + /// Returns the bytes representation of the multihash. + pub fn into_bytes(self) -> Vec { + self.to_vec() + } + + /// Returns the bytes representation of the multihash. + pub fn to_vec(&self) -> Vec { + Vec::from(self.as_bytes()) + } + + /// Returns the bytes representation of this multihash. + pub fn as_bytes(&self) -> &[u8] { + self.storage.bytes() + } + + /// Builds a `MultihashRef` corresponding to this `Multihash`. + pub fn as_ref(&self) -> MultihashRef { + MultihashRef { + bytes: self.as_bytes(), + } + } + + /// Returns which hashing algorithm is used in this multihash. + pub fn algorithm(&self) -> Hash { + self.as_ref().algorithm() + } + + /// Returns the hashed data. + pub fn digest(&self) -> &[u8] { + self.as_ref().digest() + } +} + +impl AsRef<[u8]> for Multihash { + fn as_ref(&self) -> &[u8] { + self.as_bytes() + } +} + +impl<'a> PartialEq> for Multihash { + fn eq(&self, other: &MultihashRef<'a>) -> bool { + &*self.as_bytes() == other.as_bytes() + } +} + +impl TryFrom> for Multihash { + type Error = DecodeOwnedError; + + fn try_from(value: Vec) -> Result { + Multihash::from_bytes(value) + } +} + +impl PartialOrd for Multihash { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Multihash { + fn cmp(&self, other: &Self) -> cmp::Ordering { + self.as_ref().cmp(&other.as_ref()) + } +} + +/// Represents a valid multihash. +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct MultihashRef<'a> { + bytes: &'a [u8], +} + +impl<'a> MultihashRef<'a> { + /// Creates a `MultihashRef` from the given `input`. + pub fn from_slice(input: &'a [u8]) -> Result { + if input.is_empty() { + return Err(DecodeError::BadInputLength); + } + + // Ensure `Hash::code` returns a `u16` so that our `decode::u16` here is correct. + std::convert::identity:: u16>(Hash::code); + let (code, bytes) = decode::u16(&input).map_err(|_| DecodeError::BadInputLength)?; + + let alg = Hash::from_code(code).ok_or(DecodeError::UnknownCode)?; + + // handle the identity case + if alg == Hash::Identity { + let (hash_len, bytes) = decode::u32(&bytes).map_err(|_| DecodeError::BadInputLength)?; + if as_u64(bytes.len()) != u64::from(hash_len) { + return Err(DecodeError::BadInputLength); + } + return Ok(MultihashRef { bytes: input }); + } + + let hash_len = usize::from(alg.size()); + + // Length of input after hash code should be exactly hash_len + 1 + if bytes.len() != hash_len + 1 { + return Err(DecodeError::BadInputLength); + } + + if usize::from(bytes[0]) != hash_len { + return Err(DecodeError::BadInputLength); + } + + Ok(MultihashRef { bytes: input }) + } + + /// Returns which hashing algorithm is used in this multihash. + pub fn algorithm(&self) -> Hash { + let code = decode::u16(&self.bytes) + .expect("multihash is known to be valid algorithm") + .0; + Hash::from_code(code).expect("multihash is known to be valid") + } + + /// Returns the hashed data. + pub fn digest(&self) -> &'a [u8] { + let bytes = decode::u16(&self.bytes) + .expect("multihash is known to be valid digest") + .1; + &bytes[1..] + } + + /// Builds a `Multihash` that owns the data. + /// + /// This operation allocates. + pub fn to_owned(&self) -> Multihash { + Multihash { + storage: Storage::from_slice(self.bytes), + } + } + + /// Returns the bytes representation of this multihash. + pub fn as_bytes(&self) -> &'a [u8] { + &self.bytes + } +} + +impl<'a> PartialEq for MultihashRef<'a> { + fn eq(&self, other: &Multihash) -> bool { + self.as_bytes() == &*other.as_bytes() + } +} + +#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] +fn as_u64(a: usize) -> u64 { + a as u64 +} diff --git a/src/lib.rs b/src/lib.rs index 2a6e7289..f9d29b4d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,338 +6,11 @@ //! A `MultihashRef` is the same as a `Multihash`, except that it doesn't own its data. //! +mod digests; mod errors; mod hashes; mod storage; -use std::convert::TryFrom; -use std::fmt::Debug; -use std::hash; - -use blake2b_simd::{blake2b, Params as Blake2bVariable}; -use blake2s_simd::{blake2s, Params as Blake2sVariable}; -use sha2::Digest; -use tiny_keccak::Keccak; -use unsigned_varint::{decode, encode}; - +pub use digests::{encode, Multihash, MultihashRef}; pub use errors::{DecodeError, DecodeOwnedError, EncodeError}; pub use hashes::Hash; -use std::{cmp, fmt}; -use storage::Storage; - -// Helper macro for encoding input into output using sha1, sha2, tiny_keccak, or blake2 -macro_rules! encode { - (sha1, Sha1, $input:expr, $output:expr) => {{ - let mut hasher = sha1::Sha1::new(); - hasher.update($input); - $output.copy_from_slice(&hasher.digest().bytes()); - }}; - (sha2, $algorithm:ident, $input:expr, $output:expr) => {{ - let mut hasher = sha2::$algorithm::default(); - hasher.input($input); - $output.copy_from_slice(hasher.result().as_ref()); - }}; - (tiny, $constructor:ident, $input:expr, $output:expr) => {{ - let mut kec = Keccak::$constructor(); - kec.update($input); - kec.finalize($output); - }}; - (blake2, $algorithm:ident, $input:expr, $output:expr) => {{ - let hash = $algorithm($input); - $output.copy_from_slice(hash.as_ref()); - }}; - (blake2_256, $constructor:ident, $input:expr, $output:expr) => {{ - let hash = $constructor::new() - .hash_length(32) - .to_state() - .update($input) - .finalize(); - $output.copy_from_slice(hash.as_ref()); - }}; - (blake2_128, $constructor:ident, $input:expr, $output:expr) => {{ - let hash = $constructor::new() - .hash_length(16) - .to_state() - .update($input) - .finalize(); - $output.copy_from_slice(hash.as_ref()); - }}; -} - -// And another one to keep the matching DRY -macro_rules! match_encoder { - ($hash:ident for ($input:expr, $output:expr) { - $( $hashtype:ident => $lib:ident :: $method:ident, )* - }) => ({ - match $hash { - $( - Hash::$hashtype => encode!($lib, $method, $input, $output), - )* - - _ => return Err(EncodeError::UnsupportedType) - } - }) -} - -/// Encodes data into a multihash. -/// -/// # Errors -/// -/// Will return an error if the specified hash type is not supported. See the docs for `Hash` -/// to see what is supported. -/// -/// # Examples -/// -/// ``` -/// use multihash::{encode, Hash}; -/// -/// assert_eq!( -/// encode(Hash::SHA2256, b"hello world").unwrap().to_vec(), -/// vec![18, 32, 185, 77, 39, 185, 147, 77, 62, 8, 165, 46, 82, 215, 218, 125, 171, 250, 196, -/// 132, 239, 227, 122, 83, 128, 238, 144, 136, 247, 172, 226, 239, 205, 233] -/// ); -/// ``` -/// -pub fn encode(hash: Hash, input: &[u8]) -> Result { - // Custom length encoding for the identity multihash - if let Hash::Identity = hash { - if u64::from(std::u32::MAX) < as_u64(input.len()) { - return Err(EncodeError::UnsupportedInputLength); - } - let mut buf = encode::u16_buffer(); - let code = encode::u16(hash.code(), &mut buf); - let mut len_buf = encode::u32_buffer(); - let size = encode::u32(input.len() as u32, &mut len_buf); - Ok(Multihash { - storage: Storage::from_slices(&[&code, &size, &input]), - }) - } else { - let (offset, mut output) = encode_hash(hash); - match_encoder!(hash for (input, &mut output[offset ..]) { - SHA1 => sha1::Sha1, - SHA2256 => sha2::Sha256, - SHA2512 => sha2::Sha512, - SHA3224 => tiny::new_sha3_224, - SHA3256 => tiny::new_sha3_256, - SHA3384 => tiny::new_sha3_384, - SHA3512 => tiny::new_sha3_512, - Keccak224 => tiny::new_keccak224, - Keccak256 => tiny::new_keccak256, - Keccak384 => tiny::new_keccak384, - Keccak512 => tiny::new_keccak512, - Blake2b512 => blake2::blake2b, - Blake2b256 => blake2_256::Blake2bVariable, - Blake2s256 => blake2::blake2s, - Blake2s128 => blake2_128::Blake2sVariable, - }); - - Ok(Multihash { - storage: Storage::from_slice(&output), - }) - } -} - -// Encode the given [`Hash`] value and ensure the returned [`Vec`] -// has enough capacity to hold the actual digest. -fn encode_hash(hash: Hash) -> (usize, Vec) { - let mut buf = encode::u16_buffer(); - let code = encode::u16(hash.code(), &mut buf); - - let len = code.len() + 1 + usize::from(hash.size()); - - let mut output = Vec::with_capacity(len); - output.extend_from_slice(code); - output.push(hash.size()); - output.resize(len, 0); - - (code.len() + 1, output) -} - -/// Represents a valid multihash. -#[derive(Clone)] -pub struct Multihash { - storage: Storage, -} - -impl Debug for Multihash { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_tuple("Multihash").field(&self.as_bytes()).finish() - } -} - -impl PartialEq for Multihash { - fn eq(&self, other: &Self) -> bool { - self.storage.bytes() == other.storage.bytes() - } -} - -impl Eq for Multihash {} - -impl hash::Hash for Multihash { - fn hash(&self, state: &mut H) { - self.storage.bytes().hash(state); - } -} - -impl Multihash { - /// Verifies whether `bytes` contains a valid multihash, and if so returns a `Multihash`. - pub fn from_bytes(bytes: Vec) -> Result { - if let Err(err) = MultihashRef::from_slice(&bytes) { - return Err(DecodeOwnedError { - error: err, - data: bytes, - }); - } - Ok(Multihash { - storage: Storage::from_slice(&bytes), - }) - } - - /// Returns the bytes representation of the multihash. - pub fn into_bytes(self) -> Vec { - self.to_vec() - } - - /// Returns the bytes representation of the multihash. - pub fn to_vec(&self) -> Vec { - Vec::from(self.as_bytes()) - } - - /// Returns the bytes representation of this multihash. - pub fn as_bytes(&self) -> &[u8] { - self.storage.bytes() - } - - /// Builds a `MultihashRef` corresponding to this `Multihash`. - pub fn as_ref(&self) -> MultihashRef { - MultihashRef { - bytes: self.as_bytes(), - } - } - - /// Returns which hashing algorithm is used in this multihash. - pub fn algorithm(&self) -> Hash { - self.as_ref().algorithm() - } - - /// Returns the hashed data. - pub fn digest(&self) -> &[u8] { - self.as_ref().digest() - } -} - -impl AsRef<[u8]> for Multihash { - fn as_ref(&self) -> &[u8] { - self.as_bytes() - } -} - -impl<'a> PartialEq> for Multihash { - fn eq(&self, other: &MultihashRef<'a>) -> bool { - &*self.as_bytes() == other.as_bytes() - } -} - -impl TryFrom> for Multihash { - type Error = DecodeOwnedError; - - fn try_from(value: Vec) -> Result { - Multihash::from_bytes(value) - } -} - -impl PartialOrd for Multihash { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for Multihash { - fn cmp(&self, other: &Self) -> cmp::Ordering { - self.as_ref().cmp(&other.as_ref()) - } -} - -/// Represents a valid multihash. -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct MultihashRef<'a> { - bytes: &'a [u8], -} - -impl<'a> MultihashRef<'a> { - /// Creates a `MultihashRef` from the given `input`. - pub fn from_slice(input: &'a [u8]) -> Result { - if input.is_empty() { - return Err(DecodeError::BadInputLength); - } - - // Ensure `Hash::code` returns a `u16` so that our `decode::u16` here is correct. - std::convert::identity:: u16>(Hash::code); - let (code, bytes) = decode::u16(&input).map_err(|_| DecodeError::BadInputLength)?; - - let alg = Hash::from_code(code).ok_or(DecodeError::UnknownCode)?; - - // handle the identity case - if alg == Hash::Identity { - let (hash_len, bytes) = decode::u32(&bytes).map_err(|_| DecodeError::BadInputLength)?; - if as_u64(bytes.len()) != u64::from(hash_len) { - return Err(DecodeError::BadInputLength); - } - return Ok(MultihashRef { bytes: input }); - } - - let hash_len = usize::from(alg.size()); - - // Length of input after hash code should be exactly hash_len + 1 - if bytes.len() != hash_len + 1 { - return Err(DecodeError::BadInputLength); - } - - if usize::from(bytes[0]) != hash_len { - return Err(DecodeError::BadInputLength); - } - - Ok(MultihashRef { bytes: input }) - } - - /// Returns which hashing algorithm is used in this multihash. - pub fn algorithm(&self) -> Hash { - let code = decode::u16(&self.bytes) - .expect("multihash is known to be valid algorithm") - .0; - Hash::from_code(code).expect("multihash is known to be valid") - } - - /// Returns the hashed data. - pub fn digest(&self) -> &'a [u8] { - let bytes = decode::u16(&self.bytes) - .expect("multihash is known to be valid digest") - .1; - &bytes[1..] - } - - /// Builds a `Multihash` that owns the data. - /// - /// This operation allocates. - pub fn to_owned(&self) -> Multihash { - Multihash { - storage: Storage::from_slice(self.bytes), - } - } - - /// Returns the bytes representation of this multihash. - pub fn as_bytes(&self) -> &'a [u8] { - &self.bytes - } -} - -impl<'a> PartialEq for MultihashRef<'a> { - fn eq(&self, other: &Multihash) -> bool { - self.as_bytes() == &*other.as_bytes() - } -} - -#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] -fn as_u64(a: usize) -> u64 { - a as u64 -} From a1c4cd5f53d34a5ea1729996ecbc3645c427a07f Mon Sep 17 00:00:00 2001 From: Volker Mische Date: Thu, 13 Feb 2020 18:13:10 +0100 Subject: [PATCH 3/4] feat: Massive refactor with a new API The new API also makes it possible to wrap an existing hash into a Multihash. This is useful if you e.g. want to create hashes for testing, without spending time with actually hashing data. You also interact with multicodecs less directly. This should make it easier to use your own multihash implementations without forking the code base. BREAKING CHANGE: There is a new API When using multihashes, you now import implementations of that hash, which has a `digest()` function. That function returns a Multihash. New way: use multihash::Sha3_512; let my_multihash = Sha3_512::digest(b"hello world!"); Old way: use multihash::{encode, Hash}; let my_multihash = encode(Hash::SHA3512, b"hello world!"); --- Cargo.toml | 7 +- src/digests.rs | 216 +++++---------------- src/hashes.rs | 497 ++++++++++++++++++++++++++++++++++++++++-------- src/lib.rs | 4 +- tests/hashes.rs | 40 ++++ tests/lib.rs | 142 +++++++------- 6 files changed, 580 insertions(+), 326 deletions(-) create mode 100644 tests/hashes.rs diff --git a/Cargo.toml b/Cargo.toml index 174f9048..837b2bae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,10 +13,11 @@ edition = "2018" [dependencies] blake2b_simd = { version = "0.5.9", default-features = false } blake2s_simd = { version = "0.5.9", default-features = false } -sha1 = "0.5" -sha2 = { version = "0.7", default-features = false } -tiny-keccak = "1.4" +sha1 = "0.6" +sha2 = { version = "0.8", default-features = false } +tiny-keccak = { version = "2.0.0", features = ["keccak", "sha3"] } unsigned-varint = "0.3" +digest = { version = "0.8", default-features = false } [dev-dependencies] quickcheck = "0.9.2" diff --git a/src/digests.rs b/src/digests.rs index f6d8ef92..d6f00d04 100644 --- a/src/digests.rs +++ b/src/digests.rs @@ -1,143 +1,11 @@ use std::convert::TryFrom; use std::{cmp, fmt, hash}; -use blake2b_simd::{blake2b, Params as Blake2bVariable}; -use blake2s_simd::{blake2s, Params as Blake2sVariable}; -use sha2::Digest; -use tiny_keccak::Keccak; -use unsigned_varint::{decode, encode}; - -use crate::errors::{DecodeError, DecodeOwnedError, EncodeError}; -use crate::hashes::Hash; -use crate::storage::Storage; - -// Helper macro for encoding input into output using sha1, sha2, tiny_keccak, or blake2 -macro_rules! encode { - (sha1, Sha1, $input:expr, $output:expr) => {{ - let mut hasher = sha1::Sha1::new(); - hasher.update($input); - $output.copy_from_slice(&hasher.digest().bytes()); - }}; - (sha2, $algorithm:ident, $input:expr, $output:expr) => {{ - let mut hasher = sha2::$algorithm::default(); - hasher.input($input); - $output.copy_from_slice(hasher.result().as_ref()); - }}; - (tiny, $constructor:ident, $input:expr, $output:expr) => {{ - let mut kec = Keccak::$constructor(); - kec.update($input); - kec.finalize($output); - }}; - (blake2, $algorithm:ident, $input:expr, $output:expr) => {{ - let hash = $algorithm($input); - $output.copy_from_slice(hash.as_ref()); - }}; - (blake2_256, $constructor:ident, $input:expr, $output:expr) => {{ - let hash = $constructor::new() - .hash_length(32) - .to_state() - .update($input) - .finalize(); - $output.copy_from_slice(hash.as_ref()); - }}; - (blake2_128, $constructor:ident, $input:expr, $output:expr) => {{ - let hash = $constructor::new() - .hash_length(16) - .to_state() - .update($input) - .finalize(); - $output.copy_from_slice(hash.as_ref()); - }}; -} - -// And another one to keep the matching DRY -macro_rules! match_encoder { - ($hash:ident for ($input:expr, $output:expr) { - $( $hashtype:ident => $lib:ident :: $method:ident, )* - }) => ({ - match $hash { - $( - Hash::$hashtype => encode!($lib, $method, $input, $output), - )* - - _ => return Err(EncodeError::UnsupportedType) - } - }) -} - -/// Encodes data into a multihash. -/// -/// # Errors -/// -/// Will return an error if the specified hash type is not supported. See the docs for `Hash` -/// to see what is supported. -/// -/// # Examples -/// -/// ``` -/// use multihash::{encode, Hash}; -/// -/// assert_eq!( -/// encode(Hash::SHA2256, b"hello world").unwrap().to_vec(), -/// vec![18, 32, 185, 77, 39, 185, 147, 77, 62, 8, 165, 46, 82, 215, 218, 125, 171, 250, 196, -/// 132, 239, 227, 122, 83, 128, 238, 144, 136, 247, 172, 226, 239, 205, 233] -/// ); -/// ``` -/// -pub fn encode(hash: Hash, input: &[u8]) -> Result { - // Custom length encoding for the identity multihash - if let Hash::Identity = hash { - if u64::from(std::u32::MAX) < as_u64(input.len()) { - return Err(EncodeError::UnsupportedInputLength); - } - let mut buf = encode::u16_buffer(); - let code = encode::u16(hash.code(), &mut buf); - let mut len_buf = encode::u32_buffer(); - let size = encode::u32(input.len() as u32, &mut len_buf); - Ok(Multihash { - storage: Storage::from_slices(&[&code, &size, &input]), - }) - } else { - let (offset, mut output) = encode_hash(hash); - match_encoder!(hash for (input, &mut output[offset ..]) { - SHA1 => sha1::Sha1, - SHA2256 => sha2::Sha256, - SHA2512 => sha2::Sha512, - SHA3224 => tiny::new_sha3_224, - SHA3256 => tiny::new_sha3_256, - SHA3384 => tiny::new_sha3_384, - SHA3512 => tiny::new_sha3_512, - Keccak224 => tiny::new_keccak224, - Keccak256 => tiny::new_keccak256, - Keccak384 => tiny::new_keccak384, - Keccak512 => tiny::new_keccak512, - Blake2b512 => blake2::blake2b, - Blake2b256 => blake2_256::Blake2bVariable, - Blake2s256 => blake2::blake2s, - Blake2s128 => blake2_128::Blake2sVariable, - }); - - Ok(Multihash { - storage: Storage::from_slice(&output), - }) - } -} +use unsigned_varint::{decode as varint_decode, encode as varint_encode}; -// Encode the given [`Hash`] value and ensure the returned [`Vec`] -// has enough capacity to hold the actual digest. -fn encode_hash(hash: Hash) -> (usize, Vec) { - let mut buf = encode::u16_buffer(); - let code = encode::u16(hash.code(), &mut buf); - - let len = code.len() + 1 + usize::from(hash.size()); - - let mut output = Vec::with_capacity(len); - output.extend_from_slice(code); - output.push(hash.size()); - output.resize(len, 0); - - (code.len() + 1, output) -} +use crate::errors::{DecodeError, DecodeOwnedError}; +use crate::hashes::Code; +use crate::storage::Storage; /// Represents a valid multihash. #[derive(Clone)] @@ -202,7 +70,7 @@ impl Multihash { } /// Returns which hashing algorithm is used in this multihash. - pub fn algorithm(&self) -> Hash { + pub fn algorithm(&self) -> Code { self.as_ref().algorithm() } @@ -257,29 +125,11 @@ impl<'a> MultihashRef<'a> { return Err(DecodeError::BadInputLength); } - // Ensure `Hash::code` returns a `u16` so that our `decode::u16` here is correct. - std::convert::identity:: u16>(Hash::code); - let (code, bytes) = decode::u16(&input).map_err(|_| DecodeError::BadInputLength)?; - - let alg = Hash::from_code(code).ok_or(DecodeError::UnknownCode)?; + let (_code, bytes) = varint_decode::u64(&input).map_err(|_| DecodeError::BadInputLength)?; - // handle the identity case - if alg == Hash::Identity { - let (hash_len, bytes) = decode::u32(&bytes).map_err(|_| DecodeError::BadInputLength)?; - if as_u64(bytes.len()) != u64::from(hash_len) { - return Err(DecodeError::BadInputLength); - } - return Ok(MultihashRef { bytes: input }); - } - - let hash_len = usize::from(alg.size()); - - // Length of input after hash code should be exactly hash_len + 1 - if bytes.len() != hash_len + 1 { - return Err(DecodeError::BadInputLength); - } - - if usize::from(bytes[0]) != hash_len { + let (hash_len, bytes) = + varint_decode::u64(&bytes).map_err(|_| DecodeError::BadInputLength)?; + if (bytes.len() as u64) != hash_len { return Err(DecodeError::BadInputLength); } @@ -287,19 +137,19 @@ impl<'a> MultihashRef<'a> { } /// Returns which hashing algorithm is used in this multihash. - pub fn algorithm(&self) -> Hash { - let code = decode::u16(&self.bytes) - .expect("multihash is known to be valid algorithm") - .0; - Hash::from_code(code).expect("multihash is known to be valid") + pub fn algorithm(&self) -> Code { + let (code, _bytes) = + varint_decode::u64(&self.bytes).expect("multihash is known to be valid algorithm"); + Code::from_u64(code) } /// Returns the hashed data. pub fn digest(&self) -> &'a [u8] { - let bytes = decode::u16(&self.bytes) - .expect("multihash is known to be valid digest") - .1; - &bytes[1..] + let (_code, bytes) = + varint_decode::u64(&self.bytes).expect("multihash is known to be valid digest"); + let (_hash_len, bytes) = + varint_decode::u64(&bytes).expect("multihash is known to be a valid digest"); + &bytes[..] } /// Builds a `Multihash` that owns the data. @@ -323,7 +173,31 @@ impl<'a> PartialEq for MultihashRef<'a> { } } -#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] -fn as_u64(a: usize) -> u64 { - a as u64 +/// The `MultihashDigest` trait specifies an interface common for all multihash functions. +pub trait MultihashDigest { + /// The Mutlihash byte value. + fn code(&self) -> Code; + + /// Hash some input and return the digest. + /// + /// # Panics + /// + /// Panics if the digest length is bigger than 2^32. This only happens for identity hasing. + fn digest(&self, data: &[u8]) -> Multihash; +} + +/// Wraps a hash digest in Multihash with the given Mutlihash code. +/// +/// The size of the hash is determoned by the size of the input hash. If it should be truncated +/// the input data must already be the truncated hash. +pub fn wrap(code: &Code, data: &[u8]) -> Multihash { + let mut code_buf = varint_encode::u64_buffer(); + let code = varint_encode::u64(code.to_u64(), &mut code_buf); + + let mut size_buf = varint_encode::u64_buffer(); + let size = varint_encode::u64(data.len() as u64, &mut size_buf); + + Multihash { + storage: Storage::from_slices(&[code, &size, &data]), + } } diff --git a/src/hashes.rs b/src/hashes.rs index 418ae6dc..163a1120 100644 --- a/src/hashes.rs +++ b/src/hashes.rs @@ -1,24 +1,30 @@ -/// List of types currently supported in the multihash spec. -/// -/// Not all hash types are supported by this library. -#[derive(PartialEq, Eq, Clone, Debug, Copy, Hash)] -pub enum Hash { +use blake2b_simd::Params as Blake2b; +use blake2s_simd::Params as Blake2s; +use digest::Digest; +use sha1::Sha1 as Sha1Hasher; +use sha2::{Sha256, Sha512}; +use tiny_keccak::{Hasher, Keccak, Sha3}; + +use crate::digests::{wrap, Multihash, MultihashDigest}; + +#[derive(Clone, Debug, PartialEq)] +pub enum Code { /// Identity (Raw binary ) Identity, /// SHA-1 (20-byte hash size) - SHA1, + Sha1, /// SHA-256 (32-byte hash size) - SHA2256, + Sha2_256, /// SHA-512 (64-byte hash size) - SHA2512, - /// SHA3-512 (64-byte hash size) - SHA3512, - /// SHA3-384 (48-byte hash size) - SHA3384, - /// SHA3-256 (32-byte hash size) - SHA3256, + Sha2_512, /// SHA3-224 (28-byte hash size) - SHA3224, + Sha3_224, + /// SHA3-256 (32-byte hash size) + Sha3_256, + /// SHA3-384 (48-byte hash size) + Sha3_384, + /// SHA3-512 (64-byte hash size) + Sha3_512, /// Keccak-224 (28-byte hash size) Keccak224, /// Keccak-256 (32-byte hash size) @@ -35,76 +41,411 @@ pub enum Hash { Blake2s128, /// BLAKE2s-256 (32-byte hash size) Blake2s256, + /// Make it possible to use a custom code that is not part of the enum yet + Custom(u64), } -impl Hash { - /// Get the corresponding hash code. - pub fn code(self) -> u16 { - match self { - Hash::Identity => 0x00, - Hash::SHA1 => 0x11, - Hash::SHA2256 => 0x12, - Hash::SHA2512 => 0x13, - Hash::SHA3224 => 0x17, - Hash::SHA3256 => 0x16, - Hash::SHA3384 => 0x15, - Hash::SHA3512 => 0x14, - Hash::Keccak224 => 0x1A, - Hash::Keccak256 => 0x1B, - Hash::Keccak384 => 0x1C, - Hash::Keccak512 => 0x1D, - Hash::Blake2b256 => 0xB220, - Hash::Blake2b512 => 0xB240, - Hash::Blake2s128 => 0xB250, - Hash::Blake2s256 => 0xB260, +impl Code { + /// Return the code as integer value. + pub fn to_u64(&self) -> u64 { + match *self { + Self::Custom(code) => code, + Self::Identity => 0x00, + Self::Sha1 => 0x11, + Self::Sha2_256 => 0x12, + Self::Sha2_512 => 0x13, + Self::Sha3_224 => 0x17, + Self::Sha3_256 => 0x16, + Self::Sha3_384 => 0x15, + Self::Sha3_512 => 0x14, + Self::Keccak224 => 0x1a, + Self::Keccak256 => 0x1b, + Self::Keccak384 => 0x1c, + Self::Keccak512 => 0x1d, + Self::Blake2b256 => 0xb220, + Self::Blake2b512 => 0xb240, + Self::Blake2s128 => 0xb250, + Self::Blake2s256 => 0xb260, + } + } + + /// Return the `Code` based on the integer value. If the code is unknown/not implemented yet + /// then it returns a `Code::Custom`. + /// implementes with that value. + pub fn from_u64(code: u64) -> Self { + match code { + 0x00 => Code::Identity, + 0x11 => Code::Sha1, + 0x12 => Code::Sha2_256, + 0x13 => Code::Sha2_512, + 0x14 => Code::Sha3_512, + 0x15 => Code::Sha3_384, + 0x16 => Code::Sha3_256, + 0x17 => Code::Sha3_224, + 0x1A => Code::Keccak224, + 0x1B => Code::Keccak256, + 0x1C => Code::Keccak384, + 0x1D => Code::Keccak512, + 0xB220 => Code::Blake2b256, + 0xB240 => Code::Blake2b512, + 0xB250 => Code::Blake2s128, + 0xB260 => Code::Blake2s256, + _ => Code::Custom(code), } } - /// Get the hash length in bytes. - pub fn size(self) -> u8 { - match self { - // TODO vmx 2020-01-27: Identity doesn't have a fixed length. The `size()` API should - // be removed or renamed to `max_size()` as you can also store truncated hashes with a - // different size. - Hash::Identity => 42, - Hash::SHA1 => 20, - Hash::SHA2256 => 32, - Hash::SHA2512 => 64, - Hash::SHA3224 => 28, - Hash::SHA3256 => 32, - Hash::SHA3384 => 48, - Hash::SHA3512 => 64, - Hash::Keccak224 => 28, - Hash::Keccak256 => 32, - Hash::Keccak384 => 48, - Hash::Keccak512 => 64, - Hash::Blake2b256 => 32, - Hash::Blake2b512 => 64, - Hash::Blake2s128 => 16, - Hash::Blake2s256 => 32, + /// Return the hasher that is used to create a hash with this code. + /// + /// If a custom code is used, `None` is returned. + pub fn hasher(&self) -> Option> { + match *self { + Self::Custom(_) => None, + Self::Identity => Some(Box::new(Identity)), + Self::Sha1 => Some(Box::new(Sha1)), + Self::Sha2_256 => Some(Box::new(Sha2_256)), + Self::Sha2_512 => Some(Box::new(Sha2_512)), + Self::Sha3_224 => Some(Box::new(Sha3_224)), + Self::Sha3_256 => Some(Box::new(Sha3_256)), + Self::Sha3_384 => Some(Box::new(Sha3_384)), + Self::Sha3_512 => Some(Box::new(Sha3_512)), + Self::Keccak224 => Some(Box::new(Keccak224)), + Self::Keccak256 => Some(Box::new(Keccak256)), + Self::Keccak384 => Some(Box::new(Keccak384)), + Self::Keccak512 => Some(Box::new(Keccak512)), + Self::Blake2b256 => Some(Box::new(Blake2b256)), + Self::Blake2b512 => Some(Box::new(Blake2b512)), + Self::Blake2s128 => Some(Box::new(Blake2s128)), + Self::Blake2s256 => Some(Box::new(Blake2s256)), } } +} + +#[derive(Clone, Debug)] +pub struct Identity; +impl MultihashDigest for Identity { + fn code(&self) -> Code { + Self::CODE + } + fn digest(&self, data: &[u8]) -> Multihash { + Self::digest(data) + } +} +impl Identity { + pub const CODE: Code = Code::Identity; + pub fn digest(data: &[u8]) -> Multihash { + if (data.len() as u64) >= u64::from(std::u32::MAX) { + panic!("Input data for identity hash is too large, it needs to be less the 2^32.") + } + wrap(&Self::CODE, &data) + } +} + +#[derive(Clone, Debug)] +pub struct Sha1; +impl MultihashDigest for Sha1 { + fn code(&self) -> Code { + Self::CODE + } + fn digest(&self, data: &[u8]) -> Multihash { + Self::digest(data) + } +} +impl Sha1 { + pub const CODE: Code = Code::Sha1; + pub fn digest(data: &[u8]) -> Multihash { + let digest = Sha1Hasher::from(&data).digest().bytes(); + wrap(&Self::CODE, &digest) + } +} + +#[derive(Clone, Debug)] +pub struct Sha2_256; +impl MultihashDigest for Sha2_256 { + fn code(&self) -> Code { + Self::CODE + } + fn digest(&self, data: &[u8]) -> Multihash { + Self::digest(data) + } +} +impl Sha2_256 { + pub const CODE: Code = Code::Sha2_256; + pub fn digest(data: &[u8]) -> Multihash { + let digest = Sha256::digest(&data); + wrap(&Self::CODE, &digest) + } +} + +#[derive(Clone, Debug)] +pub struct Sha2_512; +impl MultihashDigest for Sha2_512 { + fn code(&self) -> Code { + Self::CODE + } + fn digest(&self, data: &[u8]) -> Multihash { + Self::digest(data) + } +} +impl Sha2_512 { + pub const CODE: Code = Code::Sha2_512; + pub fn digest(data: &[u8]) -> Multihash { + let digest = Sha512::digest(&data); + wrap(&Self::CODE, &digest) + } +} + +#[derive(Clone, Debug)] +pub struct Sha3_224; +impl MultihashDigest for Sha3_224 { + fn code(&self) -> Code { + Self::CODE + } + fn digest(&self, data: &[u8]) -> Multihash { + Self::digest(data) + } +} +impl Sha3_224 { + pub const CODE: Code = Code::Sha3_224; + pub fn digest(data: &[u8]) -> Multihash { + let mut digest = [0; 28]; + let mut sha3 = Sha3::v224(); + sha3.update(&data); + sha3.finalize(&mut digest); + wrap(&Self::CODE, &digest) + } +} + +#[derive(Clone, Debug)] +pub struct Sha3_256; +impl MultihashDigest for Sha3_256 { + fn code(&self) -> Code { + Self::CODE + } + fn digest(&self, data: &[u8]) -> Multihash { + Self::digest(data) + } +} +impl Sha3_256 { + pub const CODE: Code = Code::Sha3_256; + pub fn digest(data: &[u8]) -> Multihash { + let mut digest = [0; 32]; + let mut sha3 = Sha3::v256(); + sha3.update(&data); + sha3.finalize(&mut digest); + wrap(&Self::CODE, &digest) + } +} + +#[derive(Clone, Debug)] +pub struct Sha3_384; +impl MultihashDigest for Sha3_384 { + fn code(&self) -> Code { + Self::CODE + } + fn digest(&self, data: &[u8]) -> Multihash { + Self::digest(data) + } +} +impl Sha3_384 { + pub const CODE: Code = Code::Sha3_384; + pub fn digest(data: &[u8]) -> Multihash { + let mut digest = [0; 48]; + let mut sha3 = Sha3::v384(); + sha3.update(&data); + sha3.finalize(&mut digest); + wrap(&Self::CODE, &digest) + } +} + +#[derive(Clone, Debug)] +pub struct Sha3_512; +impl MultihashDigest for Sha3_512 { + fn code(&self) -> Code { + Self::CODE + } + fn digest(&self, data: &[u8]) -> Multihash { + Self::digest(data) + } +} +impl Sha3_512 { + pub const CODE: Code = Code::Sha3_512; + pub fn digest(data: &[u8]) -> Multihash { + let mut digest = [0; 64]; + let mut sha3 = Sha3::v512(); + sha3.update(&data); + sha3.finalize(&mut digest); + wrap(&Self::CODE, &digest) + } +} + +#[derive(Clone, Debug)] +pub struct Keccak224; +impl MultihashDigest for Keccak224 { + fn code(&self) -> Code { + Self::CODE + } + fn digest(&self, data: &[u8]) -> Multihash { + Self::digest(data) + } +} +impl Keccak224 { + pub const CODE: Code = Code::Keccak224; + pub fn digest(data: &[u8]) -> Multihash { + let mut digest = [0; 28]; + let mut keccak = Keccak::v224(); + keccak.update(&data); + keccak.finalize(&mut digest); + wrap(&Self::CODE, &digest) + } +} - /// Returns the algorithm corresponding to a code, or `None` if no algorithm is matching. - pub fn from_code(code: u16) -> Option { - Some(match code { - 0x00 => Hash::Identity, - 0x11 => Hash::SHA1, - 0x12 => Hash::SHA2256, - 0x13 => Hash::SHA2512, - 0x14 => Hash::SHA3512, - 0x15 => Hash::SHA3384, - 0x16 => Hash::SHA3256, - 0x17 => Hash::SHA3224, - 0x1A => Hash::Keccak224, - 0x1B => Hash::Keccak256, - 0x1C => Hash::Keccak384, - 0x1D => Hash::Keccak512, - 0xB220 => Hash::Blake2b256, - 0xB240 => Hash::Blake2b512, - 0xB250 => Hash::Blake2s128, - 0xB260 => Hash::Blake2s256, - _ => return None, - }) +#[derive(Clone, Debug)] +pub struct Keccak256; +impl MultihashDigest for Keccak256 { + fn code(&self) -> Code { + Self::CODE + } + fn digest(&self, data: &[u8]) -> Multihash { + Self::digest(data) + } +} +impl Keccak256 { + pub const CODE: Code = Code::Keccak256; + pub fn digest(data: &[u8]) -> Multihash { + let mut digest = [0; 32]; + let mut keccak = Keccak::v256(); + keccak.update(&data); + keccak.finalize(&mut digest); + wrap(&Self::CODE, &digest) + } +} + +#[derive(Clone, Debug)] +pub struct Keccak384; +impl MultihashDigest for Keccak384 { + fn code(&self) -> Code { + Self::CODE + } + fn digest(&self, data: &[u8]) -> Multihash { + Self::digest(data) + } +} +impl Keccak384 { + pub const CODE: Code = Code::Keccak384; + pub fn digest(data: &[u8]) -> Multihash { + let mut digest = [0; 48]; + let mut keccak = Keccak::v384(); + keccak.update(&data); + keccak.finalize(&mut digest); + wrap(&Self::CODE, &digest) + } +} + +#[derive(Clone, Debug)] +pub struct Keccak512; +impl MultihashDigest for Keccak512 { + fn code(&self) -> Code { + Self::CODE + } + fn digest(&self, data: &[u8]) -> Multihash { + Self::digest(data) + } +} +impl Keccak512 { + pub const CODE: Code = Code::Keccak512; + pub fn digest(data: &[u8]) -> Multihash { + let mut digest = [0; 64]; + let mut keccak = Keccak::v512(); + keccak.update(&data); + keccak.finalize(&mut digest); + wrap(&Self::CODE, &digest) + } +} + +#[derive(Clone, Debug)] +pub struct Blake2b256; +impl MultihashDigest for Blake2b256 { + fn code(&self) -> Code { + Self::CODE + } + fn digest(&self, data: &[u8]) -> Multihash { + Self::digest(data) + } +} +impl Blake2b256 { + pub const CODE: Code = Code::Blake2b256; + pub fn digest(data: &[u8]) -> Multihash { + let digest = Blake2b::new() + .hash_length(32) + .to_state() + .update(&data) + .finalize(); + wrap(&Self::CODE, &digest.as_bytes()) + } +} + +#[derive(Clone, Debug)] +pub struct Blake2b512; +impl MultihashDigest for Blake2b512 { + fn code(&self) -> Code { + Self::CODE + } + fn digest(&self, data: &[u8]) -> Multihash { + Self::digest(data) + } +} +impl Blake2b512 { + pub const CODE: Code = Code::Blake2b512; + pub fn digest(data: &[u8]) -> Multihash { + let digest = Blake2b::new() + .hash_length(64) + .to_state() + .update(&data) + .finalize(); + wrap(&Self::CODE, &digest.as_bytes()) + } +} + +#[derive(Clone, Debug)] +pub struct Blake2s128; +impl MultihashDigest for Blake2s128 { + fn code(&self) -> Code { + Self::CODE + } + fn digest(&self, data: &[u8]) -> Multihash { + Self::digest(data) + } +} +impl Blake2s128 { + pub const CODE: Code = Code::Blake2s128; + pub fn digest(data: &[u8]) -> Multihash { + let digest = Blake2s::new() + .hash_length(16) + .to_state() + .update(&data) + .finalize(); + wrap(&Self::CODE, &digest.as_bytes()) + } +} + +#[derive(Clone, Debug)] +pub struct Blake2s256; +impl MultihashDigest for Blake2s256 { + fn code(&self) -> Code { + Self::CODE + } + fn digest(&self, data: &[u8]) -> Multihash { + Self::digest(data) + } +} +impl Blake2s256 { + pub const CODE: Code = Code::Blake2s256; + pub fn digest(data: &[u8]) -> Multihash { + let digest = Blake2s::new() + .hash_length(32) + .to_state() + .update(&data) + .finalize(); + wrap(&Self::CODE, &digest.as_bytes()) } } diff --git a/src/lib.rs b/src/lib.rs index f9d29b4d..b2a33ea6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,6 @@ mod errors; mod hashes; mod storage; -pub use digests::{encode, Multihash, MultihashRef}; +pub use digests::{wrap, Multihash, MultihashDigest, MultihashRef}; pub use errors::{DecodeError, DecodeOwnedError, EncodeError}; -pub use hashes::Hash; +pub use hashes::*; diff --git a/tests/hashes.rs b/tests/hashes.rs new file mode 100644 index 00000000..1f4524f3 --- /dev/null +++ b/tests/hashes.rs @@ -0,0 +1,40 @@ +use multihash::{wrap, Code, Multihash, MultihashDigest, Sha3_512}; + +#[test] +fn to_u64() { + assert_eq!(Code::Keccak256.to_u64(), 0x1b); + assert_eq!(Code::Custom(0x1234).to_u64(), 0x1234); +} + +#[test] +fn from_u64() { + assert_eq!(Code::from_u64(0xb220), Code::Blake2b256); + assert_eq!(Code::from_u64(0x0011_2233), Code::Custom(0x0011_2233)); +} + +#[test] +fn hasher() { + let expected = Sha3_512::digest(b"abcdefg"); + let hasher = Code::Sha3_512.hasher().unwrap(); + assert_eq!(hasher.digest(b"abcdefg"), expected); + assert!(Code::Custom(0x2222).hasher().is_none()); +} + +#[test] +fn custom_multihash_digest() { + #[derive(Clone, Debug)] + struct SameHash; + impl MultihashDigest for SameHash { + fn code(&self) -> Code { + Code::Custom(0x9999) + } + + fn digest(&self, _data: &[u8]) -> Multihash { + let data = b"alwaysthesame"; + wrap(&Self.code(), data) + } + } + + let my_hash = SameHash.digest(b"abc"); + assert_eq!(my_hash.digest(), b"alwaysthesame"); +} diff --git a/tests/lib.rs b/tests/lib.rs index 400b4099..0a32be43 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -1,4 +1,4 @@ -use multihash::{encode, Hash, Multihash, MultihashRef}; +use multihash::*; /// Helper function to convert a hex-encoded byte array back into a bytearray fn hex_to_bytes(s: &str) -> Vec { @@ -15,9 +15,9 @@ macro_rules! assert_encode { {$( $alg:ident, $data:expr, $expect:expr; )*} => { $( assert_eq!( - encode(Hash::$alg, $data).expect("Must be supported").into_bytes(), + $alg::digest($data).into_bytes(), hex_to_bytes($expect), - "{:?} encodes correctly", Hash::$alg + "{:?} encodes correctly", $alg ); )* } @@ -26,15 +26,17 @@ macro_rules! assert_encode { #[test] fn multihash_encode() { assert_encode! { + // A hash with a length bigger than 0x80, hence needing 2 bytes to encode the length + Identity, b"abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz", "00a1016162636465666768696a6b6c6d6e6f707172737475767778797a206162636465666768696a6b6c6d6e6f707172737475767778797a206162636465666768696a6b6c6d6e6f707172737475767778797a206162636465666768696a6b6c6d6e6f707172737475767778797a206162636465666768696a6b6c6d6e6f707172737475767778797a206162636465666768696a6b6c6d6e6f707172737475767778797a"; Identity, b"beep boop", "00096265657020626f6f70"; - SHA1, b"beep boop", "11147c8357577f51d4f0a8d393aa1aaafb28863d9421"; - SHA2256, b"helloworld", "1220936a185caaa266bb9cbe981e9e05cb78cd732b0b3280eb944412bb6f8f8f07af"; - SHA2256, b"beep boop", "122090ea688e275d580567325032492b597bc77221c62493e76330b85ddda191ef7c"; - SHA2512, b"hello world", "1340309ecc489c12d6eb4cc40f50c902f2b4d0ed77ee511a7c7a9bcd3ca86d4cd86f989dd35bc5ff499670da34255b45b0cfd830e81f605dcf7dc5542e93ae9cd76f"; - SHA3224, b"hello world", "171Cdfb7f18c77e928bb56faeb2da27291bd790bc1045cde45f3210bb6c5"; - SHA3256, b"hello world", "1620644bcc7e564373040999aac89e7622f3ca71fba1d972fd94a31c3bfbf24e3938"; - SHA3384, b"hello world", "153083bff28dde1b1bf5810071c6643c08e5b05bdb836effd70b403ea8ea0a634dc4997eb1053aa3593f590f9c63630dd90b"; - SHA3512, b"hello world", "1440840006653e9ac9e95117a15c915caab81662918e925de9e004f774ff82d7079a40d4d27b1b372657c61d46d470304c88c788b3a4527ad074d1dccbee5dbaa99a"; + Sha1, b"beep boop", "11147c8357577f51d4f0a8d393aa1aaafb28863d9421"; + Sha2_256, b"helloworld", "1220936a185caaa266bb9cbe981e9e05cb78cd732b0b3280eb944412bb6f8f8f07af"; + Sha2_256, b"beep boop", "122090ea688e275d580567325032492b597bc77221c62493e76330b85ddda191ef7c"; + Sha2_512, b"hello world", "1340309ecc489c12d6eb4cc40f50c902f2b4d0ed77ee511a7c7a9bcd3ca86d4cd86f989dd35bc5ff499670da34255b45b0cfd830e81f605dcf7dc5542e93ae9cd76f"; + Sha3_224, b"hello world", "171Cdfb7f18c77e928bb56faeb2da27291bd790bc1045cde45f3210bb6c5"; + Sha3_256, b"hello world", "1620644bcc7e564373040999aac89e7622f3ca71fba1d972fd94a31c3bfbf24e3938"; + Sha3_384, b"hello world", "153083bff28dde1b1bf5810071c6643c08e5b05bdb836effd70b403ea8ea0a634dc4997eb1053aa3593f590f9c63630dd90b"; + Sha3_512, b"hello world", "1440840006653e9ac9e95117a15c915caab81662918e925de9e004f774ff82d7079a40d4d27b1b372657c61d46d470304c88c788b3a4527ad074d1dccbee5dbaa99a"; Keccak224, b"hello world", "1A1C25f3ecfebabe99686282f57f5c9e1f18244cfee2813d33f955aae568"; Keccak256, b"hello world", "1B2047173285a8d7341e5e972fc677286384f802f8ef42a5ec5f03bbfa254cb01fad"; Keccak384, b"hello world", "1C3065fc99339a2a40e99d3c40d695b22f278853ca0f925cde4254bcae5e22ece47e6441f91b6568425adc9d95b0072eb49f"; @@ -52,8 +54,8 @@ macro_rules! assert_decode { let hash = hex_to_bytes($hash); assert_eq!( MultihashRef::from_slice(&hash).unwrap().algorithm(), - Hash::$alg, - "{:?} decodes correctly", Hash::$alg + $alg::CODE, + "{:?} decodes correctly", $alg ); )* } @@ -63,14 +65,14 @@ macro_rules! assert_decode { fn assert_decode() { assert_decode! { Identity, "000a68656c6c6f776f726c64"; - SHA1, "11147c8357577f51d4f0a8d393aa1aaafb28863d9421"; - SHA2256, "1220936a185caaa266bb9cbe981e9e05cb78cd732b0b3280eb944412bb6f8f8f07af"; - SHA2256, "122090ea688e275d580567325032492b597bc77221c62493e76330b85ddda191ef7c"; - SHA2512, "1340309ecc489c12d6eb4cc40f50c902f2b4d0ed77ee511a7c7a9bcd3ca86d4cd86f989dd35bc5ff499670da34255b45b0cfd830e81f605dcf7dc5542e93ae9cd76f"; - SHA3224, "171Cdfb7f18c77e928bb56faeb2da27291bd790bc1045cde45f3210bb6c5"; - SHA3256, "1620644bcc7e564373040999aac89e7622f3ca71fba1d972fd94a31c3bfbf24e3938"; - SHA3384, "153083bff28dde1b1bf5810071c6643c08e5b05bdb836effd70b403ea8ea0a634dc4997eb1053aa3593f590f9c63630dd90b"; - SHA3512, "1440840006653e9ac9e95117a15c915caab81662918e925de9e004f774ff82d7079a40d4d27b1b372657c61d46d470304c88c788b3a4527ad074d1dccbee5dbaa99a"; + Sha1, "11147c8357577f51d4f0a8d393aa1aaafb28863d9421"; + Sha2_256, "1220936a185caaa266bb9cbe981e9e05cb78cd732b0b3280eb944412bb6f8f8f07af"; + Sha2_256, "122090ea688e275d580567325032492b597bc77221c62493e76330b85ddda191ef7c"; + Sha2_512, "1340309ecc489c12d6eb4cc40f50c902f2b4d0ed77ee511a7c7a9bcd3ca86d4cd86f989dd35bc5ff499670da34255b45b0cfd830e81f605dcf7dc5542e93ae9cd76f"; + Sha3_224, "171Cdfb7f18c77e928bb56faeb2da27291bd790bc1045cde45f3210bb6c5"; + Sha3_256, "1620644bcc7e564373040999aac89e7622f3ca71fba1d972fd94a31c3bfbf24e3938"; + Sha3_384, "153083bff28dde1b1bf5810071c6643c08e5b05bdb836effd70b403ea8ea0a634dc4997eb1053aa3593f590f9c63630dd90b"; + Sha3_512, "1440840006653e9ac9e95117a15c915caab81662918e925de9e004f774ff82d7079a40d4d27b1b372657c61d46d470304c88c788b3a4527ad074d1dccbee5dbaa99a"; Keccak224, "1A1C25f3ecfebabe99686282f57f5c9e1f18244cfee2813d33f955aae568"; Keccak256, "1B2047173285a8d7341e5e972fc677286384f802f8ef42a5ec5f03bbfa254cb01fad"; Keccak384, "1C3065fc99339a2a40e99d3c40d695b22f278853ca0f925cde4254bcae5e22ece47e6441f91b6568425adc9d95b0072eb49f"; @@ -86,10 +88,10 @@ macro_rules! assert_roundtrip { ($( $alg:ident ),*) => { $( { - let hash: Vec = encode(Hash::$alg, b"helloworld").unwrap().into_bytes(); + let hash: Vec = $alg::digest(b"helloworld").into_bytes(); assert_eq!( MultihashRef::from_slice(&hash).unwrap().algorithm(), - Hash::$alg + $alg::CODE ); } )* @@ -99,40 +101,21 @@ macro_rules! assert_roundtrip { #[test] fn assert_roundtrip() { assert_roundtrip!( - Identity, SHA1, SHA2256, SHA2512, SHA3224, SHA3256, SHA3384, SHA3512, Keccak224, Keccak256, - Keccak384, Keccak512, Blake2b512, Blake2s256 + Identity, Sha1, Sha2_256, Sha2_512, Sha3_224, Sha3_256, Sha3_384, Sha3_512, Keccak224, + Keccak256, Keccak384, Keccak512, Blake2b512, Blake2s256 ); } -#[test] -fn hash_types() { - assert_eq!(Hash::SHA1.size(), 20); - assert_eq!(Hash::SHA2256.size(), 32); - assert_eq!(Hash::SHA2512.size(), 64); - assert_eq!(Hash::SHA3224.size(), 28); - assert_eq!(Hash::SHA3256.size(), 32); - assert_eq!(Hash::SHA3384.size(), 48); - assert_eq!(Hash::SHA3512.size(), 64); - assert_eq!(Hash::Keccak224.size(), 28); - assert_eq!(Hash::Keccak256.size(), 32); - assert_eq!(Hash::Keccak384.size(), 48); - assert_eq!(Hash::Keccak512.size(), 64); - assert_eq!(Hash::Blake2b256.size(), 32); - assert_eq!(Hash::Blake2b512.size(), 64); - assert_eq!(Hash::Blake2s128.size(), 16); - assert_eq!(Hash::Blake2s256.size(), 32); -} - /// Testing the public interface of `Multihash` and `MultihashRef` -fn test_methods(hash: Hash, prefix: &str, digest: &str) { +fn test_methods(hash: impl MultihashDigest, prefix: &str, digest: &str) { let expected_bytes = hex_to_bytes(&format!("{}{}", prefix, digest)); - let multihash = encode(hash, b"hello world").unwrap(); + let multihash = hash.digest(b"hello world"); assert_eq!( Multihash::from_bytes(expected_bytes.clone()).unwrap(), multihash ); assert_eq!(multihash.as_bytes(), &expected_bytes[..]); - assert_eq!(multihash.algorithm(), hash); + assert_eq!(multihash.algorithm(), hash.code()); assert_eq!(multihash.digest(), &hex_to_bytes(digest)[..]); let multihash_ref = multihash.as_ref(); @@ -153,76 +136,91 @@ fn test_methods(hash: Hash, prefix: &str, digest: &str) { #[test] fn multihash_methods() { - test_methods(Hash::Identity, "000b", "68656c6c6f20776f726c64"); + test_methods(Identity, "000b", "68656c6c6f20776f726c64"); + test_methods(Sha1, "1114", "2aae6c35c94fcfb415dbe95f408b9ce91ee846ed"); test_methods( - Hash::SHA1, - "1114", - "2aae6c35c94fcfb415dbe95f408b9ce91ee846ed", - ); - test_methods( - Hash::SHA2256, + Sha2_256, "1220", "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9", ); test_methods( - Hash::SHA2512, + Sha2_512, "1340", "309ecc489c12d6eb4cc40f50c902f2b4d0ed77ee511a7c7a9bcd3ca86d4cd86f989dd35bc5ff499670da34255b45b0cfd830e81f605dcf7dc5542e93ae9cd76f"); test_methods( - Hash::SHA3224, + Sha3_224, "171C", "dfb7f18c77e928bb56faeb2da27291bd790bc1045cde45f3210bb6c5", ); test_methods( - Hash::SHA3256, + Sha3_256, "1620", "644bcc7e564373040999aac89e7622f3ca71fba1d972fd94a31c3bfbf24e3938", ); test_methods( - Hash::SHA3384, + Sha3_384, "1530", "83bff28dde1b1bf5810071c6643c08e5b05bdb836effd70b403ea8ea0a634dc4997eb1053aa3593f590f9c63630dd90b"); test_methods( - Hash::SHA3512, + Sha3_512, "1440", "840006653e9ac9e95117a15c915caab81662918e925de9e004f774ff82d7079a40d4d27b1b372657c61d46d470304c88c788b3a4527ad074d1dccbee5dbaa99a"); test_methods( - Hash::Keccak224, + Keccak224, "1A1C", "25f3ecfebabe99686282f57f5c9e1f18244cfee2813d33f955aae568", ); test_methods( - Hash::Keccak256, + Keccak256, "1B20", "47173285a8d7341e5e972fc677286384f802f8ef42a5ec5f03bbfa254cb01fad", ); test_methods( - Hash::Keccak384, + Keccak384, "1C30", "65fc99339a2a40e99d3c40d695b22f278853ca0f925cde4254bcae5e22ece47e6441f91b6568425adc9d95b0072eb49f"); test_methods( - Hash::Keccak512, + Keccak512, "1D40", "3ee2b40047b8060f68c67242175660f4174d0af5c01d47168ec20ed619b0b7c42181f40aa1046f39e2ef9efc6910782a998e0013d172458957957fac9405b67d"); test_methods( - Hash::Blake2b512, + Blake2b512, "c0e40240", "021ced8799296ceca557832ab941a50b4a11f83478cf141f51f933f653ab9fbcc05a037cddbed06e309bf334942c4e58cdf1a46e237911ccd7fcf9787cbc7fd0"); test_methods( - Hash::Blake2s256, + Blake2s256, "e0e40220", "9aec6806794561107e594b1f6a8a6b0c92a0cba9acf5e5e93cca06f781813b0b", ); test_methods( - Hash::Blake2b256, + Blake2b256, "a0e40220", "256c83b297114d201b30179f3f0ef0cace9783622da5974326b436178aeef610", ); - test_methods( - Hash::Blake2s128, - "d0e40210", - "37deae0226c30da2ab424a7b8ee14e83", + test_methods(Blake2s128, "d0e40210", "37deae0226c30da2ab424a7b8ee14e83"); +} + +#[test] +fn test_long_identity_hash() { + // A hash with a length bigger than 0x80, hence needing 2 bytes to encode the length + let input = b"abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz"; + let multihash = Identity::digest(input); + assert_eq!(multihash.digest().to_vec(), input.to_vec()); +} + +#[test] +fn custom_multihash() { + let code = Code::Custom(0x1234); + let data = b"abcde".to_vec(); + let multihash = wrap(&code, &data); + + assert_eq!( + multihash.as_bytes(), + &[0xb4, 0x24, 0x05, 0x61, 0x62, 0x63, 0x64, 0x65] ); + assert_eq!(multihash.algorithm(), code); + assert_eq!(multihash.algorithm().to_u64(), 0x1234); + assert_eq!(multihash.digest(), b"abcde"); } #[test] @@ -243,7 +241,7 @@ fn multihash_errors() { Multihash::from_bytes(vec![0x12, 0x20, 0xff]).is_err(), "Should error on correct prefix with wrong digest" ); - let identity_code = Hash::Identity.code() as u8; + let identity_code = Identity::CODE.to_u64() as u8; let identity_length = 3; assert!( Multihash::from_bytes(vec![identity_code, identity_length, 1, 2, 3, 4]).is_err(), @@ -269,7 +267,7 @@ fn multihash_ref_errors() { MultihashRef::from_slice(&[0x12, 0x20, 0xff]).is_err(), "Should error on correct prefix with wrong digest" ); - let identity_code = Hash::Identity.code() as u8; + let identity_code = Identity::CODE.to_u64() as u8; let identity_length = 3; assert!( MultihashRef::from_slice(&[identity_code, identity_length, 1, 2, 3, 4]).is_err(), From d185165dc3415a8b3511364d3e22604e39abaf1b Mon Sep 17 00:00:00 2001 From: Volker Mische Date: Fri, 21 Feb 2020 18:10:26 +0100 Subject: [PATCH 4/4] feat: derive `Copy` trait for `Code` Also make `wrap()` take a code instance and not a reference, that makes the API nicer. --- src/digests.rs | 2 +- src/hashes.rs | 34 +++++++++++++++++----------------- tests/hashes.rs | 2 +- tests/lib.rs | 2 +- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/digests.rs b/src/digests.rs index d6f00d04..7a8c6624 100644 --- a/src/digests.rs +++ b/src/digests.rs @@ -190,7 +190,7 @@ pub trait MultihashDigest { /// /// The size of the hash is determoned by the size of the input hash. If it should be truncated /// the input data must already be the truncated hash. -pub fn wrap(code: &Code, data: &[u8]) -> Multihash { +pub fn wrap(code: Code, data: &[u8]) -> Multihash { let mut code_buf = varint_encode::u64_buffer(); let code = varint_encode::u64(code.to_u64(), &mut code_buf); diff --git a/src/hashes.rs b/src/hashes.rs index 163a1120..7ebdc0bb 100644 --- a/src/hashes.rs +++ b/src/hashes.rs @@ -7,7 +7,7 @@ use tiny_keccak::{Hasher, Keccak, Sha3}; use crate::digests::{wrap, Multihash, MultihashDigest}; -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Copy, Debug, PartialEq)] pub enum Code { /// Identity (Raw binary ) Identity, @@ -136,7 +136,7 @@ impl Identity { if (data.len() as u64) >= u64::from(std::u32::MAX) { panic!("Input data for identity hash is too large, it needs to be less the 2^32.") } - wrap(&Self::CODE, &data) + wrap(Self::CODE, &data) } } @@ -154,7 +154,7 @@ impl Sha1 { pub const CODE: Code = Code::Sha1; pub fn digest(data: &[u8]) -> Multihash { let digest = Sha1Hasher::from(&data).digest().bytes(); - wrap(&Self::CODE, &digest) + wrap(Self::CODE, &digest) } } @@ -172,7 +172,7 @@ impl Sha2_256 { pub const CODE: Code = Code::Sha2_256; pub fn digest(data: &[u8]) -> Multihash { let digest = Sha256::digest(&data); - wrap(&Self::CODE, &digest) + wrap(Self::CODE, &digest) } } @@ -190,7 +190,7 @@ impl Sha2_512 { pub const CODE: Code = Code::Sha2_512; pub fn digest(data: &[u8]) -> Multihash { let digest = Sha512::digest(&data); - wrap(&Self::CODE, &digest) + wrap(Self::CODE, &digest) } } @@ -211,7 +211,7 @@ impl Sha3_224 { let mut sha3 = Sha3::v224(); sha3.update(&data); sha3.finalize(&mut digest); - wrap(&Self::CODE, &digest) + wrap(Self::CODE, &digest) } } @@ -232,7 +232,7 @@ impl Sha3_256 { let mut sha3 = Sha3::v256(); sha3.update(&data); sha3.finalize(&mut digest); - wrap(&Self::CODE, &digest) + wrap(Self::CODE, &digest) } } @@ -253,7 +253,7 @@ impl Sha3_384 { let mut sha3 = Sha3::v384(); sha3.update(&data); sha3.finalize(&mut digest); - wrap(&Self::CODE, &digest) + wrap(Self::CODE, &digest) } } @@ -274,7 +274,7 @@ impl Sha3_512 { let mut sha3 = Sha3::v512(); sha3.update(&data); sha3.finalize(&mut digest); - wrap(&Self::CODE, &digest) + wrap(Self::CODE, &digest) } } @@ -295,7 +295,7 @@ impl Keccak224 { let mut keccak = Keccak::v224(); keccak.update(&data); keccak.finalize(&mut digest); - wrap(&Self::CODE, &digest) + wrap(Self::CODE, &digest) } } @@ -316,7 +316,7 @@ impl Keccak256 { let mut keccak = Keccak::v256(); keccak.update(&data); keccak.finalize(&mut digest); - wrap(&Self::CODE, &digest) + wrap(Self::CODE, &digest) } } @@ -337,7 +337,7 @@ impl Keccak384 { let mut keccak = Keccak::v384(); keccak.update(&data); keccak.finalize(&mut digest); - wrap(&Self::CODE, &digest) + wrap(Self::CODE, &digest) } } @@ -358,7 +358,7 @@ impl Keccak512 { let mut keccak = Keccak::v512(); keccak.update(&data); keccak.finalize(&mut digest); - wrap(&Self::CODE, &digest) + wrap(Self::CODE, &digest) } } @@ -380,7 +380,7 @@ impl Blake2b256 { .to_state() .update(&data) .finalize(); - wrap(&Self::CODE, &digest.as_bytes()) + wrap(Self::CODE, &digest.as_bytes()) } } @@ -402,7 +402,7 @@ impl Blake2b512 { .to_state() .update(&data) .finalize(); - wrap(&Self::CODE, &digest.as_bytes()) + wrap(Self::CODE, &digest.as_bytes()) } } @@ -424,7 +424,7 @@ impl Blake2s128 { .to_state() .update(&data) .finalize(); - wrap(&Self::CODE, &digest.as_bytes()) + wrap(Self::CODE, &digest.as_bytes()) } } @@ -446,6 +446,6 @@ impl Blake2s256 { .to_state() .update(&data) .finalize(); - wrap(&Self::CODE, &digest.as_bytes()) + wrap(Self::CODE, &digest.as_bytes()) } } diff --git a/tests/hashes.rs b/tests/hashes.rs index 1f4524f3..a8b03c46 100644 --- a/tests/hashes.rs +++ b/tests/hashes.rs @@ -31,7 +31,7 @@ fn custom_multihash_digest() { fn digest(&self, _data: &[u8]) -> Multihash { let data = b"alwaysthesame"; - wrap(&Self.code(), data) + wrap(Self.code(), data) } } diff --git a/tests/lib.rs b/tests/lib.rs index 0a32be43..fd4aa828 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -212,7 +212,7 @@ fn test_long_identity_hash() { fn custom_multihash() { let code = Code::Custom(0x1234); let data = b"abcde".to_vec(); - let multihash = wrap(&code, &data); + let multihash = wrap(code, &data); assert_eq!( multihash.as_bytes(),