Skip to content

Commit 4c22240

Browse files
twittnervmx
authored andcommitted
multihash: Use Bytes instead of Vec<u8> internally. (#1187)
multihash: Use `Bytes` instead of `Vec<u8>` internally. To improve the efficiency of cloning multi-hashes (e.g. as the representation of `PeerId`s), this PR replaces the `Vec<u8>` representation with `Bytes`. The API is kept backwards compatible and does not leak the representation type. Originally from libp2p/rust-libp2p@8af4a28.
1 parent cd7b98a commit 4c22240

File tree

2 files changed

+48
-40
lines changed

2 files changed

+48
-40
lines changed

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ edition = "2018"
2020
[dependencies]
2121
blake2b_simd = { version = "0.5.9", default-features = false }
2222
blake2s_simd = { version = "0.5.9", default-features = false }
23+
bytes = "0.5"
2324
sha1 = "0.5"
2425
sha2 = { version = "0.7", default-features = false }
2526
tiny-keccak = "1.4"
26-
unsigned-varint = "0.2"
27+
unsigned-varint = "0.3"

src/lib.rs

Lines changed: 46 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use std::convert::TryFrom;
1313

1414
use blake2b_simd::blake2b;
1515
use blake2s_simd::blake2s;
16+
use bytes::{BufMut, Bytes, BytesMut};
1617
use sha2::Digest;
1718
use tiny_keccak::Keccak;
1819
use unsigned_varint::{decode, encode};
@@ -71,25 +72,15 @@ macro_rules! match_encoder {
7172
/// use multihash::{encode, Hash};
7273
///
7374
/// assert_eq!(
74-
/// encode(Hash::SHA2256, b"hello world").unwrap().into_bytes(),
75+
/// encode(Hash::SHA2256, b"hello world").unwrap().to_vec(),
7576
/// vec![18, 32, 185, 77, 39, 185, 147, 77, 62, 8, 165, 46, 82, 215, 218, 125, 171, 250, 196,
7677
/// 132, 239, 227, 122, 83, 128, 238, 144, 136, 247, 172, 226, 239, 205, 233]
7778
/// );
7879
/// ```
7980
///
8081
pub fn encode(hash: Hash, input: &[u8]) -> Result<Multihash, EncodeError> {
81-
let mut buf = encode::u16_buffer();
82-
let code = encode::u16(hash.code(), &mut buf);
83-
84-
let header_len = code.len() + 1;
85-
let size = hash.size();
86-
87-
let mut output = Vec::new();
88-
output.resize(header_len + size as usize, 0);
89-
output[..code.len()].copy_from_slice(code);
90-
output[code.len()] = size;
91-
92-
match_encoder!(hash for (input, &mut output[header_len..]) {
82+
let (offset, mut output) = encode_hash(hash);
83+
match_encoder!(hash for (input, &mut output[offset ..]) {
9384
SHA1 => sha1::Sha1,
9485
SHA2256 => sha2::Sha256,
9586
SHA2512 => sha2::Sha512,
@@ -105,55 +96,73 @@ pub fn encode(hash: Hash, input: &[u8]) -> Result<Multihash, EncodeError> {
10596
Blake2s256 => blake2::blake2s,
10697
});
10798

108-
Ok(Multihash { bytes: output })
99+
Ok(Multihash {
100+
bytes: output.freeze(),
101+
})
102+
}
103+
104+
// Encode the given [`Hash`] value and ensure the returned [`BytesMut`]
105+
// has enough capacity to hold the actual digest.
106+
fn encode_hash(hash: Hash) -> (usize, BytesMut) {
107+
let mut buf = encode::u16_buffer();
108+
let code = encode::u16(hash.code(), &mut buf);
109+
110+
let len = code.len() + 1 + usize::from(hash.size());
111+
112+
let mut output = BytesMut::with_capacity(len);
113+
output.put_slice(code);
114+
output.put_u8(hash.size());
115+
output.resize(len, 0);
116+
117+
(code.len() + 1, output)
109118
}
110119

111120
/// Represents a valid multihash.
112121
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
113122
pub struct Multihash {
114-
bytes: Vec<u8>,
123+
bytes: Bytes,
115124
}
116125

117126
impl Multihash {
118127
/// Verifies whether `bytes` contains a valid multihash, and if so returns a `Multihash`.
119-
#[inline]
120128
pub fn from_bytes(bytes: Vec<u8>) -> Result<Multihash, DecodeOwnedError> {
121129
if let Err(err) = MultihashRef::from_slice(&bytes) {
122130
return Err(DecodeOwnedError {
123131
error: err,
124132
data: bytes,
125133
});
126134
}
127-
128-
Ok(Multihash { bytes })
135+
Ok(Multihash {
136+
bytes: Bytes::from(bytes),
137+
})
129138
}
130139

131140
/// Returns the bytes representation of the multihash.
132-
#[inline]
133141
pub fn into_bytes(self) -> Vec<u8> {
134-
self.bytes
142+
self.to_vec()
143+
}
144+
145+
/// Returns the bytes representation of the multihash.
146+
pub fn to_vec(&self) -> Vec<u8> {
147+
Vec::from(&self.bytes[..])
135148
}
136149

137150
/// Returns the bytes representation of this multihash.
138-
#[inline]
139151
pub fn as_bytes(&self) -> &[u8] {
140152
&self.bytes
141153
}
142154

143155
/// Builds a `MultihashRef` corresponding to this `Multihash`.
144-
#[inline]
145156
pub fn as_ref(&self) -> MultihashRef {
146157
MultihashRef { bytes: &self.bytes }
147158
}
148159

149160
/// Returns which hashing algorithm is used in this multihash.
150-
#[inline]
151161
pub fn algorithm(&self) -> Hash {
152162
self.as_ref().algorithm()
153163
}
154164

155165
/// Returns the hashed data.
156-
#[inline]
157166
pub fn digest(&self) -> &[u8] {
158167
self.as_ref().digest()
159168
}
@@ -166,7 +175,6 @@ impl AsRef<[u8]> for Multihash {
166175
}
167176

168177
impl<'a> PartialEq<MultihashRef<'a>> for Multihash {
169-
#[inline]
170178
fn eq(&self, other: &MultihashRef<'a>) -> bool {
171179
&*self.bytes == other.bytes
172180
}
@@ -187,64 +195,63 @@ pub struct MultihashRef<'a> {
187195
}
188196

189197
impl<'a> MultihashRef<'a> {
190-
/// Verifies whether `bytes` contains a valid multihash, and if so returns a `MultihashRef`.
191-
pub fn from_slice(input: &'a [u8]) -> Result<MultihashRef<'a>, DecodeError> {
198+
/// Creates a `MultihashRef` from the given `input`.
199+
pub fn from_slice(input: &'a [u8]) -> Result<Self, DecodeError> {
192200
if input.is_empty() {
193201
return Err(DecodeError::BadInputLength);
194202
}
195203

196-
// NOTE: We choose u16 here because there is no hashing algorithm implemented in this crate
197-
// whose length exceeds 2^16 - 1.
204+
// Ensure `Hash::code` returns a `u16` so that our `decode::u16` here is correct.
205+
std::convert::identity::<fn(Hash) -> u16>(Hash::code);
198206
let (code, bytes) = decode::u16(&input).map_err(|_| DecodeError::BadInputLength)?;
199207

200208
let alg = Hash::from_code(code).ok_or(DecodeError::UnknownCode)?;
201-
let hash_len = alg.size() as usize;
209+
let hash_len = usize::from(alg.size());
202210

203211
// Length of input after hash code should be exactly hash_len + 1
204212
if bytes.len() != hash_len + 1 {
205213
return Err(DecodeError::BadInputLength);
206214
}
207215

208-
if bytes[0] as usize != hash_len {
216+
if usize::from(bytes[0]) != hash_len {
209217
return Err(DecodeError::BadInputLength);
210218
}
211219

212220
Ok(MultihashRef { bytes: input })
213221
}
214222

215223
/// Returns which hashing algorithm is used in this multihash.
216-
#[inline]
217224
pub fn algorithm(&self) -> Hash {
218-
let (code, _) = decode::u16(&self.bytes).expect("multihash is known to be valid algorithm");
225+
let code = decode::u16(&self.bytes)
226+
.expect("multihash is known to be valid algorithm")
227+
.0;
219228
Hash::from_code(code).expect("multihash is known to be valid")
220229
}
221230

222231
/// Returns the hashed data.
223-
#[inline]
224232
pub fn digest(&self) -> &'a [u8] {
225-
let (_, bytes) = decode::u16(&self.bytes).expect("multihash is known to be valid digest");
233+
let bytes = decode::u16(&self.bytes)
234+
.expect("multihash is known to be valid digest")
235+
.1;
226236
&bytes[1..]
227237
}
228238

229239
/// Builds a `Multihash` that owns the data.
230240
///
231241
/// This operation allocates.
232-
#[inline]
233242
pub fn to_owned(&self) -> Multihash {
234243
Multihash {
235-
bytes: self.bytes.to_owned(),
244+
bytes: Bytes::copy_from_slice(self.bytes),
236245
}
237246
}
238247

239248
/// Returns the bytes representation of this multihash.
240-
#[inline]
241249
pub fn as_bytes(&self) -> &'a [u8] {
242250
&self.bytes
243251
}
244252
}
245253

246254
impl<'a> PartialEq<Multihash> for MultihashRef<'a> {
247-
#[inline]
248255
fn eq(&self, other: &Multihash) -> bool {
249256
self.bytes == &*other.bytes
250257
}

0 commit comments

Comments
 (0)