Skip to content

Commit a301942

Browse files
committed
refactor: move contents from lib.rs to digests.rs
1 parent f669f76 commit a301942

File tree

2 files changed

+311
-307
lines changed

2 files changed

+311
-307
lines changed

src/digests.rs

Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,309 @@
1+
//! # Multihash
2+
//!
3+
//! Implementation of [multihash](https://github.com/multiformats/multihash) in Rust.
4+
//!
5+
//! A `Multihash` is a structure that contains a hashing algorithm, plus some hashed data.
6+
//! A `MultihashRef` is the same as a `Multihash`, except that it doesn't own its data.
7+
//!
8+
9+
use std::convert::TryFrom;
10+
11+
use blake2b_simd::{blake2b, Params as Blake2bVariable};
12+
use blake2s_simd::{blake2s, Params as Blake2sVariable};
13+
use bytes::{BufMut, Bytes, BytesMut};
14+
use sha2::Digest;
15+
use tiny_keccak::Keccak;
16+
use unsigned_varint::{decode, encode};
17+
18+
use crate::errors::{DecodeError, DecodeOwnedError, EncodeError};
19+
use crate::hashes::Hash;
20+
21+
// Helper macro for encoding input into output using sha1, sha2, tiny_keccak, or blake2
22+
macro_rules! encode {
23+
(sha1, Sha1, $input:expr, $output:expr) => {{
24+
let mut hasher = sha1::Sha1::new();
25+
hasher.update($input);
26+
$output.copy_from_slice(&hasher.digest().bytes());
27+
}};
28+
(sha2, $algorithm:ident, $input:expr, $output:expr) => {{
29+
let mut hasher = sha2::$algorithm::default();
30+
hasher.input($input);
31+
$output.copy_from_slice(hasher.result().as_ref());
32+
}};
33+
(tiny, $constructor:ident, $input:expr, $output:expr) => {{
34+
let mut kec = Keccak::$constructor();
35+
kec.update($input);
36+
kec.finalize($output);
37+
}};
38+
(blake2, $algorithm:ident, $input:expr, $output:expr) => {{
39+
let hash = $algorithm($input);
40+
$output.copy_from_slice(hash.as_ref());
41+
}};
42+
(blake2_256, $constructor:ident, $input:expr, $output:expr) => {{
43+
let hash = $constructor::new()
44+
.hash_length(32)
45+
.to_state()
46+
.update($input)
47+
.finalize();
48+
$output.copy_from_slice(hash.as_ref());
49+
}};
50+
(blake2_128, $constructor:ident, $input:expr, $output:expr) => {{
51+
let hash = $constructor::new()
52+
.hash_length(16)
53+
.to_state()
54+
.update($input)
55+
.finalize();
56+
$output.copy_from_slice(hash.as_ref());
57+
}};
58+
}
59+
60+
// And another one to keep the matching DRY
61+
macro_rules! match_encoder {
62+
($hash:ident for ($input:expr, $output:expr) {
63+
$( $hashtype:ident => $lib:ident :: $method:ident, )*
64+
}) => ({
65+
match $hash {
66+
$(
67+
Hash::$hashtype => encode!($lib, $method, $input, $output),
68+
)*
69+
70+
_ => return Err(EncodeError::UnsupportedType)
71+
}
72+
})
73+
}
74+
75+
/// Encodes data into a multihash.
76+
///
77+
/// # Errors
78+
///
79+
/// Will return an error if the specified hash type is not supported. See the docs for `Hash`
80+
/// to see what is supported.
81+
///
82+
/// # Examples
83+
///
84+
/// ```
85+
/// use multihash::{encode, Hash};
86+
///
87+
/// assert_eq!(
88+
/// encode(Hash::SHA2256, b"hello world").unwrap().to_vec(),
89+
/// vec![18, 32, 185, 77, 39, 185, 147, 77, 62, 8, 165, 46, 82, 215, 218, 125, 171, 250, 196,
90+
/// 132, 239, 227, 122, 83, 128, 238, 144, 136, 247, 172, 226, 239, 205, 233]
91+
/// );
92+
/// ```
93+
///
94+
pub fn encode(hash: Hash, input: &[u8]) -> Result<Multihash, EncodeError> {
95+
// Custom length encoding for the identity multihash
96+
if let Hash::Identity = hash {
97+
if u64::from(std::u32::MAX) < as_u64(input.len()) {
98+
return Err(EncodeError::UnsupportedInputLength);
99+
}
100+
let mut buf = encode::u16_buffer();
101+
let code = encode::u16(hash.code(), &mut buf);
102+
let mut len_buf = encode::u32_buffer();
103+
let size = encode::u32(input.len() as u32, &mut len_buf);
104+
105+
let total_len = code.len() + size.len() + input.len();
106+
107+
let mut output = BytesMut::with_capacity(total_len);
108+
output.put_slice(code);
109+
output.put_slice(size);
110+
output.put_slice(input);
111+
Ok(Multihash {
112+
bytes: output.freeze(),
113+
})
114+
} else {
115+
let (offset, mut output) = encode_hash(hash);
116+
match_encoder!(hash for (input, &mut output[offset ..]) {
117+
SHA1 => sha1::Sha1,
118+
SHA2256 => sha2::Sha256,
119+
SHA2512 => sha2::Sha512,
120+
SHA3224 => tiny::new_sha3_224,
121+
SHA3256 => tiny::new_sha3_256,
122+
SHA3384 => tiny::new_sha3_384,
123+
SHA3512 => tiny::new_sha3_512,
124+
Keccak224 => tiny::new_keccak224,
125+
Keccak256 => tiny::new_keccak256,
126+
Keccak384 => tiny::new_keccak384,
127+
Keccak512 => tiny::new_keccak512,
128+
Blake2b512 => blake2::blake2b,
129+
Blake2b256 => blake2_256::Blake2bVariable,
130+
Blake2s256 => blake2::blake2s,
131+
Blake2s128 => blake2_128::Blake2sVariable,
132+
});
133+
134+
Ok(Multihash {
135+
bytes: output.freeze(),
136+
})
137+
}
138+
}
139+
140+
// Encode the given [`Hash`] value and ensure the returned [`BytesMut`]
141+
// has enough capacity to hold the actual digest.
142+
fn encode_hash(hash: Hash) -> (usize, BytesMut) {
143+
let mut buf = encode::u16_buffer();
144+
let code = encode::u16(hash.code(), &mut buf);
145+
146+
let len = code.len() + 1 + usize::from(hash.size());
147+
148+
let mut output = BytesMut::with_capacity(len);
149+
output.put_slice(code);
150+
output.put_u8(hash.size());
151+
output.resize(len, 0);
152+
153+
(code.len() + 1, output)
154+
}
155+
156+
/// Represents a valid multihash.
157+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
158+
pub struct Multihash {
159+
bytes: Bytes,
160+
}
161+
162+
impl Multihash {
163+
/// Verifies whether `bytes` contains a valid multihash, and if so returns a `Multihash`.
164+
pub fn from_bytes(bytes: Vec<u8>) -> Result<Multihash, DecodeOwnedError> {
165+
if let Err(err) = MultihashRef::from_slice(&bytes) {
166+
return Err(DecodeOwnedError {
167+
error: err,
168+
data: bytes,
169+
});
170+
}
171+
Ok(Multihash {
172+
bytes: Bytes::from(bytes),
173+
})
174+
}
175+
176+
/// Returns the bytes representation of the multihash.
177+
pub fn into_bytes(self) -> Vec<u8> {
178+
self.to_vec()
179+
}
180+
181+
/// Returns the bytes representation of the multihash.
182+
pub fn to_vec(&self) -> Vec<u8> {
183+
Vec::from(&self.bytes[..])
184+
}
185+
186+
/// Returns the bytes representation of this multihash.
187+
pub fn as_bytes(&self) -> &[u8] {
188+
&self.bytes
189+
}
190+
191+
/// Builds a `MultihashRef` corresponding to this `Multihash`.
192+
pub fn as_ref(&self) -> MultihashRef {
193+
MultihashRef { bytes: &self.bytes }
194+
}
195+
196+
/// Returns which hashing algorithm is used in this multihash.
197+
pub fn algorithm(&self) -> Hash {
198+
self.as_ref().algorithm()
199+
}
200+
201+
/// Returns the hashed data.
202+
pub fn digest(&self) -> &[u8] {
203+
self.as_ref().digest()
204+
}
205+
}
206+
207+
impl AsRef<[u8]> for Multihash {
208+
fn as_ref(&self) -> &[u8] {
209+
self.as_bytes()
210+
}
211+
}
212+
213+
impl<'a> PartialEq<MultihashRef<'a>> for Multihash {
214+
fn eq(&self, other: &MultihashRef<'a>) -> bool {
215+
&*self.bytes == other.bytes
216+
}
217+
}
218+
219+
impl TryFrom<Vec<u8>> for Multihash {
220+
type Error = DecodeOwnedError;
221+
222+
fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
223+
Multihash::from_bytes(value)
224+
}
225+
}
226+
227+
/// Represents a valid multihash.
228+
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
229+
pub struct MultihashRef<'a> {
230+
bytes: &'a [u8],
231+
}
232+
233+
impl<'a> MultihashRef<'a> {
234+
/// Creates a `MultihashRef` from the given `input`.
235+
pub fn from_slice(input: &'a [u8]) -> Result<Self, DecodeError> {
236+
if input.is_empty() {
237+
return Err(DecodeError::BadInputLength);
238+
}
239+
240+
// Ensure `Hash::code` returns a `u16` so that our `decode::u16` here is correct.
241+
std::convert::identity::<fn(Hash) -> u16>(Hash::code);
242+
let (code, bytes) = decode::u16(&input).map_err(|_| DecodeError::BadInputLength)?;
243+
244+
let alg = Hash::from_code(code).ok_or(DecodeError::UnknownCode)?;
245+
246+
// handle the identity case
247+
if alg == Hash::Identity {
248+
let (hash_len, bytes) = decode::u32(&bytes).map_err(|_| DecodeError::BadInputLength)?;
249+
if as_u64(bytes.len()) != u64::from(hash_len) {
250+
return Err(DecodeError::BadInputLength);
251+
}
252+
return Ok(MultihashRef { bytes: input });
253+
}
254+
255+
let hash_len = usize::from(alg.size());
256+
257+
// Length of input after hash code should be exactly hash_len + 1
258+
if bytes.len() != hash_len + 1 {
259+
return Err(DecodeError::BadInputLength);
260+
}
261+
262+
if usize::from(bytes[0]) != hash_len {
263+
return Err(DecodeError::BadInputLength);
264+
}
265+
266+
Ok(MultihashRef { bytes: input })
267+
}
268+
269+
/// Returns which hashing algorithm is used in this multihash.
270+
pub fn algorithm(&self) -> Hash {
271+
let code = decode::u16(&self.bytes)
272+
.expect("multihash is known to be valid algorithm")
273+
.0;
274+
Hash::from_code(code).expect("multihash is known to be valid")
275+
}
276+
277+
/// Returns the hashed data.
278+
pub fn digest(&self) -> &'a [u8] {
279+
let bytes = decode::u16(&self.bytes)
280+
.expect("multihash is known to be valid digest")
281+
.1;
282+
&bytes[1..]
283+
}
284+
285+
/// Builds a `Multihash` that owns the data.
286+
///
287+
/// This operation allocates.
288+
pub fn to_owned(&self) -> Multihash {
289+
Multihash {
290+
bytes: Bytes::copy_from_slice(self.bytes),
291+
}
292+
}
293+
294+
/// Returns the bytes representation of this multihash.
295+
pub fn as_bytes(&self) -> &'a [u8] {
296+
&self.bytes
297+
}
298+
}
299+
300+
impl<'a> PartialEq<Multihash> for MultihashRef<'a> {
301+
fn eq(&self, other: &Multihash) -> bool {
302+
self.bytes == &*other.bytes
303+
}
304+
}
305+
306+
#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
307+
fn as_u64(a: usize) -> u64 {
308+
a as u64
309+
}

0 commit comments

Comments
 (0)