Skip to content

Commit aa54ac9

Browse files
committed
feat: Massive refactor with a new API
The new API also makes it possible to wrap an existing hash into a Multihash. This is useful if you e.g. want to create hashes for testing, without spending time with actually hashing data. You also interact with multicodecs less directly. This should make it easier to use your own multihash implementations without forking the code base. BREAKING CHANGE: There is a new API When using multihashes, you now import implementations of that hash, which has a `digest()` function. That function returns a Multihash. New way: use multihash::{MultihashDigest, Sha3_512}; let my_multihash = Sha3_512::digest(b"hello world!"); Old way: use multihash::{encode, Hash}; let my_multihash = encode(Hash::SHA3512, b"hello world!");
1 parent a301942 commit aa54ac9

File tree

5 files changed

+373
-340
lines changed

5 files changed

+373
-340
lines changed

Cargo.toml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@ edition = "2018"
1414
blake2b_simd = { version = "0.5.9", default-features = false }
1515
blake2s_simd = { version = "0.5.9", default-features = false }
1616
bytes = "0.5"
17-
sha1 = "0.5"
18-
sha2 = { version = "0.7", default-features = false }
19-
tiny-keccak = "1.4"
17+
sha1 = "0.6"
18+
sha2 = { version = "0.8", default-features = false }
19+
tiny-keccak = { version = "2.0.0", features = ["keccak", "sha3"] }
2020
unsigned-varint = "0.3"
21+
digest = { version = "0.8", default-features = false }
22+
enum-primitive-derive = "0.1.2"
23+
num-traits = "0.2.11"

src/digests.rs

Lines changed: 74 additions & 172 deletions
Original file line numberDiff line numberDiff line change
@@ -8,150 +8,12 @@
88
99
use std::convert::TryFrom;
1010

11-
use blake2b_simd::{blake2b, Params as Blake2bVariable};
12-
use blake2s_simd::{blake2s, Params as Blake2sVariable};
1311
use bytes::{BufMut, Bytes, BytesMut};
14-
use sha2::Digest;
15-
use tiny_keccak::Keccak;
16-
use unsigned_varint::{decode, encode};
17-
18-
use crate::errors::{DecodeError, DecodeOwnedError, EncodeError};
19-
use crate::hashes::Hash;
20-
21-
// Helper macro for encoding input into output using sha1, sha2, tiny_keccak, or blake2
22-
macro_rules! encode {
23-
(sha1, Sha1, $input:expr, $output:expr) => {{
24-
let mut hasher = sha1::Sha1::new();
25-
hasher.update($input);
26-
$output.copy_from_slice(&hasher.digest().bytes());
27-
}};
28-
(sha2, $algorithm:ident, $input:expr, $output:expr) => {{
29-
let mut hasher = sha2::$algorithm::default();
30-
hasher.input($input);
31-
$output.copy_from_slice(hasher.result().as_ref());
32-
}};
33-
(tiny, $constructor:ident, $input:expr, $output:expr) => {{
34-
let mut kec = Keccak::$constructor();
35-
kec.update($input);
36-
kec.finalize($output);
37-
}};
38-
(blake2, $algorithm:ident, $input:expr, $output:expr) => {{
39-
let hash = $algorithm($input);
40-
$output.copy_from_slice(hash.as_ref());
41-
}};
42-
(blake2_256, $constructor:ident, $input:expr, $output:expr) => {{
43-
let hash = $constructor::new()
44-
.hash_length(32)
45-
.to_state()
46-
.update($input)
47-
.finalize();
48-
$output.copy_from_slice(hash.as_ref());
49-
}};
50-
(blake2_128, $constructor:ident, $input:expr, $output:expr) => {{
51-
let hash = $constructor::new()
52-
.hash_length(16)
53-
.to_state()
54-
.update($input)
55-
.finalize();
56-
$output.copy_from_slice(hash.as_ref());
57-
}};
58-
}
59-
60-
// And another one to keep the matching DRY
61-
macro_rules! match_encoder {
62-
($hash:ident for ($input:expr, $output:expr) {
63-
$( $hashtype:ident => $lib:ident :: $method:ident, )*
64-
}) => ({
65-
match $hash {
66-
$(
67-
Hash::$hashtype => encode!($lib, $method, $input, $output),
68-
)*
69-
70-
_ => return Err(EncodeError::UnsupportedType)
71-
}
72-
})
73-
}
74-
75-
/// Encodes data into a multihash.
76-
///
77-
/// # Errors
78-
///
79-
/// Will return an error if the specified hash type is not supported. See the docs for `Hash`
80-
/// to see what is supported.
81-
///
82-
/// # Examples
83-
///
84-
/// ```
85-
/// use multihash::{encode, Hash};
86-
///
87-
/// assert_eq!(
88-
/// encode(Hash::SHA2256, b"hello world").unwrap().to_vec(),
89-
/// vec![18, 32, 185, 77, 39, 185, 147, 77, 62, 8, 165, 46, 82, 215, 218, 125, 171, 250, 196,
90-
/// 132, 239, 227, 122, 83, 128, 238, 144, 136, 247, 172, 226, 239, 205, 233]
91-
/// );
92-
/// ```
93-
///
94-
pub fn encode(hash: Hash, input: &[u8]) -> Result<Multihash, EncodeError> {
95-
// Custom length encoding for the identity multihash
96-
if let Hash::Identity = hash {
97-
if u64::from(std::u32::MAX) < as_u64(input.len()) {
98-
return Err(EncodeError::UnsupportedInputLength);
99-
}
100-
let mut buf = encode::u16_buffer();
101-
let code = encode::u16(hash.code(), &mut buf);
102-
let mut len_buf = encode::u32_buffer();
103-
let size = encode::u32(input.len() as u32, &mut len_buf);
104-
105-
let total_len = code.len() + size.len() + input.len();
106-
107-
let mut output = BytesMut::with_capacity(total_len);
108-
output.put_slice(code);
109-
output.put_slice(size);
110-
output.put_slice(input);
111-
Ok(Multihash {
112-
bytes: output.freeze(),
113-
})
114-
} else {
115-
let (offset, mut output) = encode_hash(hash);
116-
match_encoder!(hash for (input, &mut output[offset ..]) {
117-
SHA1 => sha1::Sha1,
118-
SHA2256 => sha2::Sha256,
119-
SHA2512 => sha2::Sha512,
120-
SHA3224 => tiny::new_sha3_224,
121-
SHA3256 => tiny::new_sha3_256,
122-
SHA3384 => tiny::new_sha3_384,
123-
SHA3512 => tiny::new_sha3_512,
124-
Keccak224 => tiny::new_keccak224,
125-
Keccak256 => tiny::new_keccak256,
126-
Keccak384 => tiny::new_keccak384,
127-
Keccak512 => tiny::new_keccak512,
128-
Blake2b512 => blake2::blake2b,
129-
Blake2b256 => blake2_256::Blake2bVariable,
130-
Blake2s256 => blake2::blake2s,
131-
Blake2s128 => blake2_128::Blake2sVariable,
132-
});
133-
134-
Ok(Multihash {
135-
bytes: output.freeze(),
136-
})
137-
}
138-
}
12+
use num_traits::cast::FromPrimitive;
13+
use unsigned_varint::{decode as varint_decode, encode as varint_encode};
13914

140-
// Encode the given [`Hash`] value and ensure the returned [`BytesMut`]
141-
// has enough capacity to hold the actual digest.
142-
fn encode_hash(hash: Hash) -> (usize, BytesMut) {
143-
let mut buf = encode::u16_buffer();
144-
let code = encode::u16(hash.code(), &mut buf);
145-
146-
let len = code.len() + 1 + usize::from(hash.size());
147-
148-
let mut output = BytesMut::with_capacity(len);
149-
output.put_slice(code);
150-
output.put_u8(hash.size());
151-
output.resize(len, 0);
152-
153-
(code.len() + 1, output)
154-
}
15+
use crate::errors::{DecodeError, DecodeOwnedError};
16+
use crate::hashes::Code;
15517

15618
/// Represents a valid multihash.
15719
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -194,7 +56,7 @@ impl Multihash {
19456
}
19557

19658
/// Returns which hashing algorithm is used in this multihash.
197-
pub fn algorithm(&self) -> Hash {
59+
pub fn algorithm(&self) -> Code {
19860
self.as_ref().algorithm()
19961
}
20062

@@ -237,46 +99,28 @@ impl<'a> MultihashRef<'a> {
23799
return Err(DecodeError::BadInputLength);
238100
}
239101

240-
// Ensure `Hash::code` returns a `u16` so that our `decode::u16` here is correct.
241-
std::convert::identity::<fn(Hash) -> u16>(Hash::code);
242-
let (code, bytes) = decode::u16(&input).map_err(|_| DecodeError::BadInputLength)?;
243-
244-
let alg = Hash::from_code(code).ok_or(DecodeError::UnknownCode)?;
102+
let (_code, bytes) = varint_decode::u64(&input).map_err(|_| DecodeError::BadInputLength)?;
245103

246-
// handle the identity case
247-
if alg == Hash::Identity {
248-
let (hash_len, bytes) = decode::u32(&bytes).map_err(|_| DecodeError::BadInputLength)?;
249-
if as_u64(bytes.len()) != u64::from(hash_len) {
250-
return Err(DecodeError::BadInputLength);
251-
}
252-
return Ok(MultihashRef { bytes: input });
253-
}
254-
255-
let hash_len = usize::from(alg.size());
256-
257-
// Length of input after hash code should be exactly hash_len + 1
258-
if bytes.len() != hash_len + 1 {
259-
return Err(DecodeError::BadInputLength);
260-
}
261-
262-
if usize::from(bytes[0]) != hash_len {
104+
let (hash_len, bytes) =
105+
varint_decode::u64(&bytes).map_err(|_| DecodeError::BadInputLength)?;
106+
if (bytes.len() as u64) != hash_len {
263107
return Err(DecodeError::BadInputLength);
264108
}
265109

266110
Ok(MultihashRef { bytes: input })
267111
}
268112

269113
/// Returns which hashing algorithm is used in this multihash.
270-
pub fn algorithm(&self) -> Hash {
271-
let code = decode::u16(&self.bytes)
114+
pub fn algorithm(&self) -> Code {
115+
let code = varint_decode::u64(&self.bytes)
272116
.expect("multihash is known to be valid algorithm")
273117
.0;
274-
Hash::from_code(code).expect("multihash is known to be valid")
118+
Code::from_u64(code).expect("multihash is known to be valid")
275119
}
276120

277121
/// Returns the hashed data.
278122
pub fn digest(&self) -> &'a [u8] {
279-
let bytes = decode::u16(&self.bytes)
123+
let bytes = varint_decode::u16(&self.bytes)
280124
.expect("multihash is known to be valid digest")
281125
.1;
282126
&bytes[1..]
@@ -303,7 +147,65 @@ impl<'a> PartialEq<Multihash> for MultihashRef<'a> {
303147
}
304148
}
305149

306-
#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
307-
fn as_u64(a: usize) -> u64 {
308-
a as u64
150+
/// The `MultihashDigest` trait specifies an interface common for all multihash functions.
151+
pub trait MultihashDigest {
152+
/// The Mutlihash byte value.
153+
const CODE: u64;
154+
155+
/// Hash some input and return the digest.
156+
///
157+
/// # Panics
158+
///
159+
/// Panics if the digest length is bigger than 2^32. This only happens for identity hasing.
160+
fn digest(data: &[u8]) -> Multihash;
161+
162+
//fn dyn_digest(&self, data: &[u8]) -> Multihash {
163+
// Self::digest(data)
164+
//}
165+
}
166+
167+
/// The `DynMultihashDigest` trait is a variant of the `MultihashDigest` that can be used as trait
168+
/// object.
169+
pub trait DynMultihashDigest {
170+
/// The Mutlihash byte value.
171+
fn code(&self) -> u64;
172+
173+
/// Hash some input and return the digest.
174+
///
175+
/// # Panics
176+
///
177+
/// Panics if the digest length is bigger than 2^32. This only happens for identity hasing.
178+
fn digest(&self, data: &[u8]) -> Multihash;
179+
}
180+
181+
impl<T: MultihashDigest + ?Sized> DynMultihashDigest for T {
182+
fn code(&self) -> u64 {
183+
Self::CODE
184+
}
185+
fn digest(&self, data: &[u8]) -> Multihash {
186+
Self::digest(data)
187+
}
188+
}
189+
190+
/// Wraps a hash digest in Multihash with the given Mutlihash code.
191+
///
192+
/// The size of the hash is determoned by the size of the input hash. If it should be truncated
193+
/// the input data must already be the truncated hash.
194+
pub fn wrap(code: u64, data: &[u8]) -> Multihash {
195+
let mut code_buf = varint_encode::u64_buffer();
196+
let code_varint = varint_encode::u64(code, &mut code_buf);
197+
198+
let mut size_buf = varint_encode::u64_buffer();
199+
let size_varint = varint_encode::u64(data.len() as u64, &mut size_buf);
200+
201+
let len = code_varint.len() + size_varint.len();
202+
203+
let mut output = BytesMut::with_capacity(len);
204+
output.put_slice(code_varint);
205+
output.put_slice(size_varint);
206+
output.put_slice(data);
207+
208+
Multihash {
209+
bytes: output.freeze(),
210+
}
309211
}

0 commit comments

Comments
 (0)