Skip to content

Commit 19eea13

Browse files
committed
feat: Massive refactor with a new API
The new API also makes it possible to wrap an existing hash into a Multihash. This is useful if you e.g. want to create hashes for testing, without spending time with actually hashing data. You also interact with multicodecs less directly. This should make it easier to use your own multihash implementations without forking the code base. BREAKING CHANGE: There is a new API When using multihashes, you now import implementations of that hash, which has a `digest()` function. That function returns a Multihash. New way: use multihash::Sha3_512; let my_multihash = Sha3_512::digest(b"hello world!"); Old way: use multihash::{encode, Hash}; let my_multihash = encode(Hash::SHA3512, b"hello world!");
1 parent fa072c1 commit 19eea13

File tree

6 files changed

+580
-326
lines changed

6 files changed

+580
-326
lines changed

Cargo.toml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,11 @@ edition = "2018"
1313
[dependencies]
1414
blake2b_simd = { version = "0.5.9", default-features = false }
1515
blake2s_simd = { version = "0.5.9", default-features = false }
16-
sha1 = "0.5"
17-
sha2 = { version = "0.7", default-features = false }
18-
tiny-keccak = "1.4"
16+
sha1 = "0.6"
17+
sha2 = { version = "0.8", default-features = false }
18+
tiny-keccak = { version = "2.0.0", features = ["keccak", "sha3"] }
1919
unsigned-varint = "0.3"
20+
digest = { version = "0.8", default-features = false }
2021

2122
[dev-dependencies]
2223
quickcheck = "0.9.2"

src/digests.rs

Lines changed: 45 additions & 171 deletions
Original file line numberDiff line numberDiff line change
@@ -1,143 +1,11 @@
11
use std::convert::TryFrom;
22
use std::{cmp, fmt, hash};
33

4-
use blake2b_simd::{blake2b, Params as Blake2bVariable};
5-
use blake2s_simd::{blake2s, Params as Blake2sVariable};
6-
use sha2::Digest;
7-
use tiny_keccak::Keccak;
8-
use unsigned_varint::{decode, encode};
9-
10-
use crate::errors::{DecodeError, DecodeOwnedError, EncodeError};
11-
use crate::hashes::Hash;
12-
use crate::storage::Storage;
13-
14-
// Helper macro for encoding input into output using sha1, sha2, tiny_keccak, or blake2
15-
macro_rules! encode {
16-
(sha1, Sha1, $input:expr, $output:expr) => {{
17-
let mut hasher = sha1::Sha1::new();
18-
hasher.update($input);
19-
$output.copy_from_slice(&hasher.digest().bytes());
20-
}};
21-
(sha2, $algorithm:ident, $input:expr, $output:expr) => {{
22-
let mut hasher = sha2::$algorithm::default();
23-
hasher.input($input);
24-
$output.copy_from_slice(hasher.result().as_ref());
25-
}};
26-
(tiny, $constructor:ident, $input:expr, $output:expr) => {{
27-
let mut kec = Keccak::$constructor();
28-
kec.update($input);
29-
kec.finalize($output);
30-
}};
31-
(blake2, $algorithm:ident, $input:expr, $output:expr) => {{
32-
let hash = $algorithm($input);
33-
$output.copy_from_slice(hash.as_ref());
34-
}};
35-
(blake2_256, $constructor:ident, $input:expr, $output:expr) => {{
36-
let hash = $constructor::new()
37-
.hash_length(32)
38-
.to_state()
39-
.update($input)
40-
.finalize();
41-
$output.copy_from_slice(hash.as_ref());
42-
}};
43-
(blake2_128, $constructor:ident, $input:expr, $output:expr) => {{
44-
let hash = $constructor::new()
45-
.hash_length(16)
46-
.to_state()
47-
.update($input)
48-
.finalize();
49-
$output.copy_from_slice(hash.as_ref());
50-
}};
51-
}
52-
53-
// And another one to keep the matching DRY
54-
macro_rules! match_encoder {
55-
($hash:ident for ($input:expr, $output:expr) {
56-
$( $hashtype:ident => $lib:ident :: $method:ident, )*
57-
}) => ({
58-
match $hash {
59-
$(
60-
Hash::$hashtype => encode!($lib, $method, $input, $output),
61-
)*
62-
63-
_ => return Err(EncodeError::UnsupportedType)
64-
}
65-
})
66-
}
67-
68-
/// Encodes data into a multihash.
69-
///
70-
/// # Errors
71-
///
72-
/// Will return an error if the specified hash type is not supported. See the docs for `Hash`
73-
/// to see what is supported.
74-
///
75-
/// # Examples
76-
///
77-
/// ```
78-
/// use multihash::{encode, Hash};
79-
///
80-
/// assert_eq!(
81-
/// encode(Hash::SHA2256, b"hello world").unwrap().to_vec(),
82-
/// vec![18, 32, 185, 77, 39, 185, 147, 77, 62, 8, 165, 46, 82, 215, 218, 125, 171, 250, 196,
83-
/// 132, 239, 227, 122, 83, 128, 238, 144, 136, 247, 172, 226, 239, 205, 233]
84-
/// );
85-
/// ```
86-
///
87-
pub fn encode(hash: Hash, input: &[u8]) -> Result<Multihash, EncodeError> {
88-
// Custom length encoding for the identity multihash
89-
if let Hash::Identity = hash {
90-
if u64::from(std::u32::MAX) < as_u64(input.len()) {
91-
return Err(EncodeError::UnsupportedInputLength);
92-
}
93-
let mut buf = encode::u16_buffer();
94-
let code = encode::u16(hash.code(), &mut buf);
95-
let mut len_buf = encode::u32_buffer();
96-
let size = encode::u32(input.len() as u32, &mut len_buf);
97-
Ok(Multihash {
98-
storage: Storage::from_slices(&[&code, &size, &input]),
99-
})
100-
} else {
101-
let (offset, mut output) = encode_hash(hash);
102-
match_encoder!(hash for (input, &mut output[offset ..]) {
103-
SHA1 => sha1::Sha1,
104-
SHA2256 => sha2::Sha256,
105-
SHA2512 => sha2::Sha512,
106-
SHA3224 => tiny::new_sha3_224,
107-
SHA3256 => tiny::new_sha3_256,
108-
SHA3384 => tiny::new_sha3_384,
109-
SHA3512 => tiny::new_sha3_512,
110-
Keccak224 => tiny::new_keccak224,
111-
Keccak256 => tiny::new_keccak256,
112-
Keccak384 => tiny::new_keccak384,
113-
Keccak512 => tiny::new_keccak512,
114-
Blake2b512 => blake2::blake2b,
115-
Blake2b256 => blake2_256::Blake2bVariable,
116-
Blake2s256 => blake2::blake2s,
117-
Blake2s128 => blake2_128::Blake2sVariable,
118-
});
119-
120-
Ok(Multihash {
121-
storage: Storage::from_slice(&output),
122-
})
123-
}
124-
}
4+
use unsigned_varint::{decode as varint_decode, encode as varint_encode};
1255

126-
// Encode the given [`Hash`] value and ensure the returned [`Vec<u8>`]
127-
// has enough capacity to hold the actual digest.
128-
fn encode_hash(hash: Hash) -> (usize, Vec<u8>) {
129-
let mut buf = encode::u16_buffer();
130-
let code = encode::u16(hash.code(), &mut buf);
131-
132-
let len = code.len() + 1 + usize::from(hash.size());
133-
134-
let mut output = Vec::with_capacity(len);
135-
output.extend_from_slice(code);
136-
output.push(hash.size());
137-
output.resize(len, 0);
138-
139-
(code.len() + 1, output)
140-
}
6+
use crate::errors::{DecodeError, DecodeOwnedError};
7+
use crate::hashes::Code;
8+
use crate::storage::Storage;
1419

14210
/// Represents a valid multihash.
14311
#[derive(Clone)]
@@ -202,7 +70,7 @@ impl Multihash {
20270
}
20371

20472
/// Returns which hashing algorithm is used in this multihash.
205-
pub fn algorithm(&self) -> Hash {
73+
pub fn algorithm(&self) -> Code {
20674
self.as_ref().algorithm()
20775
}
20876

@@ -257,49 +125,31 @@ impl<'a> MultihashRef<'a> {
257125
return Err(DecodeError::BadInputLength);
258126
}
259127

260-
// Ensure `Hash::code` returns a `u16` so that our `decode::u16` here is correct.
261-
std::convert::identity::<fn(Hash) -> u16>(Hash::code);
262-
let (code, bytes) = decode::u16(&input).map_err(|_| DecodeError::BadInputLength)?;
263-
264-
let alg = Hash::from_code(code).ok_or(DecodeError::UnknownCode)?;
128+
let (_code, bytes) = varint_decode::u64(&input).map_err(|_| DecodeError::BadInputLength)?;
265129

266-
// handle the identity case
267-
if alg == Hash::Identity {
268-
let (hash_len, bytes) = decode::u32(&bytes).map_err(|_| DecodeError::BadInputLength)?;
269-
if as_u64(bytes.len()) != u64::from(hash_len) {
270-
return Err(DecodeError::BadInputLength);
271-
}
272-
return Ok(MultihashRef { bytes: input });
273-
}
274-
275-
let hash_len = usize::from(alg.size());
276-
277-
// Length of input after hash code should be exactly hash_len + 1
278-
if bytes.len() != hash_len + 1 {
279-
return Err(DecodeError::BadInputLength);
280-
}
281-
282-
if usize::from(bytes[0]) != hash_len {
130+
let (hash_len, bytes) =
131+
varint_decode::u64(&bytes).map_err(|_| DecodeError::BadInputLength)?;
132+
if (bytes.len() as u64) != hash_len {
283133
return Err(DecodeError::BadInputLength);
284134
}
285135

286136
Ok(MultihashRef { bytes: input })
287137
}
288138

289139
/// Returns which hashing algorithm is used in this multihash.
290-
pub fn algorithm(&self) -> Hash {
291-
let code = decode::u16(&self.bytes)
292-
.expect("multihash is known to be valid algorithm")
293-
.0;
294-
Hash::from_code(code).expect("multihash is known to be valid")
140+
pub fn algorithm(&self) -> Code {
141+
let (code, _bytes) =
142+
varint_decode::u64(&self.bytes).expect("multihash is known to be valid algorithm");
143+
Code::from_u64(code)
295144
}
296145

297146
/// Returns the hashed data.
298147
pub fn digest(&self) -> &'a [u8] {
299-
let bytes = decode::u16(&self.bytes)
300-
.expect("multihash is known to be valid digest")
301-
.1;
302-
&bytes[1..]
148+
let (_code, bytes) =
149+
varint_decode::u64(&self.bytes).expect("multihash is known to be valid digest");
150+
let (_hash_len, bytes) =
151+
varint_decode::u64(&bytes).expect("multihash is known to be a valid digest");
152+
&bytes[..]
303153
}
304154

305155
/// Builds a `Multihash` that owns the data.
@@ -323,7 +173,31 @@ impl<'a> PartialEq<Multihash> for MultihashRef<'a> {
323173
}
324174
}
325175

326-
#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
327-
fn as_u64(a: usize) -> u64 {
328-
a as u64
176+
/// The `MultihashDigest` trait specifies an interface common for all multihash functions.
177+
pub trait MultihashDigest {
178+
/// The Mutlihash byte value.
179+
fn code(&self) -> Code;
180+
181+
/// Hash some input and return the digest.
182+
///
183+
/// # Panics
184+
///
185+
/// Panics if the digest length is bigger than 2^32. This only happens for identity hasing.
186+
fn digest(&self, data: &[u8]) -> Multihash;
187+
}
188+
189+
/// Wraps a hash digest in Multihash with the given Mutlihash code.
190+
///
191+
/// The size of the hash is determoned by the size of the input hash. If it should be truncated
192+
/// the input data must already be the truncated hash.
193+
pub fn wrap(code: &Code, data: &[u8]) -> Multihash {
194+
let mut code_buf = varint_encode::u64_buffer();
195+
let code = varint_encode::u64(code.to_u64(), &mut code_buf);
196+
197+
let mut size_buf = varint_encode::u64_buffer();
198+
let size = varint_encode::u64(data.len() as u64, &mut size_buf);
199+
200+
Multihash {
201+
storage: Storage::from_slices(&[code, &size, &data]),
202+
}
329203
}

0 commit comments

Comments
 (0)