diff --git a/misc/multiaddr/Cargo.toml b/misc/multiaddr/Cargo.toml index c30b4504835..49837fd0838 100644 --- a/misc/multiaddr/Cargo.toml +++ b/misc/multiaddr/Cargo.toml @@ -19,9 +19,15 @@ serde = "1.0.70" static_assertions = "1.1" unsigned-varint = "0.3" url = { version = "2.1.0", default-features = false } +smallvec = { version = "1.0", features = ["write"] } [dev-dependencies] bincode = "1" quickcheck = "0.9.0" rand = "0.7.2" serde_json = "1.0" +criterion = "0.3" + +[[bench]] +name = "clone" +harness = false diff --git a/misc/multiaddr/benches/clone.rs b/misc/multiaddr/benches/clone.rs new file mode 100644 index 00000000000..6af4c8917be --- /dev/null +++ b/misc/multiaddr/benches/clone.rs @@ -0,0 +1,52 @@ +// Copyright 2019 Parity Technologies (UK) Ltd. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +//! This benchmark tests the speed of cloning of the largest possible inlined +//! multiaddr vs the smalles possible heap allocated multiaddr. +//! +//! Note that the main point of the storage optimization is not to speed up clone +//! but to avoid allocating on the heap at all, but still you see a nice benefit +//! in the speed of cloning. +use criterion::{Bencher, Criterion, criterion_main, criterion_group, black_box}; +use parity_multiaddr::Multiaddr; + +fn do_clone(multiaddr: &Multiaddr) -> usize { + let mut res = 0usize; + for _ in 0..10 { + res += multiaddr.clone().as_ref().len() + } + res +} + +fn clone(bench: &mut Bencher, addr: &Multiaddr) { + bench.iter(|| do_clone(black_box(addr))) +} + +fn criterion_benchmarks(bench: &mut Criterion) { + let inlined: Multiaddr = "/dns4/01234567890123456789123/tcp/80/ws".parse().unwrap(); + let heap: Multiaddr = "/dns4/0123456789012345678901234/tcp/80/ws".parse().unwrap(); + assert_eq!(inlined.as_ref().len(), 30); + assert_eq!(heap.as_ref().len(), 32); + + bench.bench_function("clone 10 max inlined", |b| clone(b, &inlined)); + bench.bench_function("clone 10 min heap", |b| clone(b, &heap)); +} + +criterion_group!(benches, criterion_benchmarks); +criterion_main!(benches); diff --git a/misc/multiaddr/src/lib.rs b/misc/multiaddr/src/lib.rs index f81b8ada763..da5436582a6 100644 --- a/misc/multiaddr/src/lib.rs +++ b/misc/multiaddr/src/lib.rs @@ -6,6 +6,7 @@ mod protocol; mod onion_addr; mod errors; mod from_url; +mod storage; use serde::{ Deserialize, @@ -17,13 +18,13 @@ use serde::{ use std::{ convert::TryFrom, fmt, - io, + hash, iter::FromIterator, net::{IpAddr, Ipv4Addr, Ipv6Addr}, result::Result as StdResult, str::FromStr, - sync::Arc }; +use storage::Storage; pub use self::errors::{Result, Error}; pub use self::from_url::{FromUrlErr, from_url, from_url_lossy}; pub use self::protocol::Protocol; @@ -36,28 +37,23 @@ static_assertions::const_assert! { } /// Representation of a Multiaddr. -#[derive(PartialEq, Eq, Clone, Hash)] -pub struct Multiaddr { bytes: Arc> } +#[derive(Clone)] +pub struct Multiaddr { storage: Storage } impl Multiaddr { /// Create a new, empty multiaddress. pub fn empty() -> Self { - Self { bytes: Arc::new(Vec::new()) } - } - - /// Create a new, empty multiaddress with the given capacity. - pub fn with_capacity(n: usize) -> Self { - Self { bytes: Arc::new(Vec::with_capacity(n)) } + Self { storage: Storage::from_slice(&[]) } } /// Return the length in bytes of this multiaddress. pub fn len(&self) -> usize { - self.bytes.len() + self.as_ref().len() } /// Return a copy of this [`Multiaddr`]'s byte representation. pub fn to_vec(&self) -> Vec { - Vec::from(&self.bytes[..]) + self.as_ref().to_vec() } /// Adds an already-parsed address component to the end of this multiaddr. @@ -73,9 +69,9 @@ impl Multiaddr { /// ``` /// pub fn push(&mut self, p: Protocol<'_>) { - let mut w = io::Cursor::<&mut Vec>::new(Arc::make_mut(&mut self.bytes)); - w.set_position(w.get_ref().len() as u64); - p.write_bytes(&mut w).expect("Writing to a `io::Cursor<&mut Vec>` never fails.") + let mut w: smallvec::SmallVec<[u8; 32]> = self.as_ref().into(); + p.write_bytes(&mut w).expect("Writing to a `Buffer` never fails."); + self.storage = Storage::from_slice(&w); } /// Pops the last `Protocol` of this multiaddr, or `None` if the multiaddr is empty. @@ -89,7 +85,7 @@ impl Multiaddr { /// ``` /// pub fn pop<'a>(&mut self) -> Option> { - let mut slice = &self.bytes[..]; // the remaining multiaddr slice + let mut slice = self.as_ref(); // the remaining multiaddr slice if slice.is_empty() { return None } @@ -100,16 +96,14 @@ impl Multiaddr { } slice = s }; - let remaining_len = self.bytes.len() - slice.len(); - Arc::make_mut(&mut self.bytes).truncate(remaining_len); + let remaining_len = self.as_ref().len() - slice.len(); + self.storage = Storage::from_slice(&self.as_ref()[..remaining_len]); Some(protocol) } /// Like [`Multiaddr::push`] but consumes `self`. pub fn with(mut self, p: Protocol<'_>) -> Self { - let mut w = io::Cursor::<&mut Vec>::new(Arc::make_mut(&mut self.bytes)); - w.set_position(w.get_ref().len() as u64); - p.write_bytes(&mut w).expect("Writing to a `io::Cursor<&mut Vec>` never fails."); + self.push(p); self } @@ -130,7 +124,7 @@ impl Multiaddr { /// ``` /// pub fn iter(&self) -> Iter<'_> { - Iter(&self.bytes) + Iter(&self.as_ref()) } /// Replace a [`Protocol`] at some position in this `Multiaddr`. @@ -145,7 +139,7 @@ impl Multiaddr { where F: FnOnce(&Protocol) -> Option> { - let mut address = Multiaddr::with_capacity(self.len()); + let mut address = Multiaddr::empty(); let mut fun = Some(by); let mut replaced = false; @@ -192,9 +186,23 @@ impl fmt::Display for Multiaddr { } } +impl PartialEq for Multiaddr { + fn eq(&self, other: &Self) -> bool { + self.as_ref() == other.as_ref() + } +} + +impl Eq for Multiaddr {} + +impl hash::Hash for Multiaddr { + fn hash(&self, state: &mut H) { + self.as_ref().hash(state); + } +} + impl AsRef<[u8]> for Multiaddr { fn as_ref(&self) -> &[u8] { - self.bytes.as_ref() + self.storage.bytes() } } @@ -203,7 +211,7 @@ impl<'a> IntoIterator for &'a Multiaddr { type IntoIter = Iter<'a>; fn into_iter(self) -> Iter<'a> { - Iter(&self.bytes) + Iter(&self.as_ref()) } } @@ -216,7 +224,7 @@ impl<'a> FromIterator> for Multiaddr { for cmp in iter { cmp.write_bytes(&mut writer).expect("Writing to a `Vec` never fails."); } - Multiaddr { bytes: Arc::new(writer) } + Multiaddr { storage: Storage::from_slice(&writer) } } } @@ -237,7 +245,7 @@ impl FromStr for Multiaddr { p.write_bytes(&mut writer).expect("Writing to a `Vec` never fails."); } - Ok(Multiaddr { bytes: Arc::new(writer) }) + Ok(Multiaddr { storage: Storage::from_slice(&writer) }) } } @@ -264,7 +272,7 @@ impl<'a> From> for Multiaddr { fn from(p: Protocol<'a>) -> Multiaddr { let mut w = Vec::new(); p.write_bytes(&mut w).expect("Writing to a `Vec` never fails."); - Multiaddr { bytes: Arc::new(w) } + Multiaddr { storage: Storage::from_slice(&w) } } } @@ -299,7 +307,7 @@ impl TryFrom> for Multiaddr { let (_, s) = Protocol::from_bytes(slice)?; slice = s } - Ok(Multiaddr { bytes: Arc::new(v) }) + Ok(Multiaddr { storage: Storage::from_slice(&v) }) } } diff --git a/misc/multiaddr/src/storage.rs b/misc/multiaddr/src/storage.rs new file mode 100644 index 00000000000..0026ccd9902 --- /dev/null +++ b/misc/multiaddr/src/storage.rs @@ -0,0 +1,110 @@ +// Copyright 2020 Parity Technologies (UK) Ltd. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +use std::sync::Arc; + +/// MAX_INLINE is the maximum size of a multiaddr that can be stored inline. +/// There is an overhead of 2 bytes, 1 for the length and 1 for the enum discriminator. +/// 30 is chosen so that the overall size is 32. This should still be big enough to fit +/// a multiaddr containing an ipv4 or ipv6 address and port. +/// +/// More complex multiaddrs like those containing peer ids will be stored on the heap. +const MAX_INLINE: usize = 30; + +#[derive(Clone)] +pub(crate) enum Storage { + /// hash is stored inline if it is smaller than MAX_INLINE + Inline(u8, [u8; MAX_INLINE]), + /// hash is stored on the heap. this must be only used if the hash is actually larger than + /// MAX_INLINE bytes to ensure a unique representation. + Heap(Arc<[u8]>), +} + +impl Storage { + /// The raw bytes. + pub fn bytes(&self) -> &[u8] { + match self { + Storage::Inline(len, bytes) => &bytes[..(*len as usize)], + Storage::Heap(data) => &data, + } + } + + /// Creates storage from a slice. + /// For a size up to MAX_INLINE, this will not allocate. + pub fn from_slice(slice: &[u8]) -> Self { + let len = slice.len(); + if len <= MAX_INLINE { + let mut data: [u8; MAX_INLINE] = [0; MAX_INLINE]; + data[..len].copy_from_slice(slice); + Storage::Inline(len as u8, data) + } else { + Storage::Heap(slice.into()) + } + } +} + +#[cfg(test)] +mod tests { + use crate::Multiaddr; + use super::{Storage, MAX_INLINE}; + use quickcheck::quickcheck; + + #[test] + fn multihash_size() { + fn assert_size(ma: &str, n: usize, inline: bool) { + let ma: Multiaddr = ma.parse().unwrap(); + assert_eq!(ma.as_ref().len(), n); + assert_eq!(n <= MAX_INLINE, inline); + } + assert_size("/ip4/127.0.0.1", 5, true); + assert_size("/ip6/2001:8a0:7ac5:4201:3ac9:86ff:fe31:7095/tcp/8000", 20, true); + assert_size("/dns4/0123456789012345678901234/tcp/8000", 30, true); + assert_size("/ip6/2001:8a0:7ac5:4201:3ac9:86ff:fe31:7095/tcp/8000/ws/p2p/QmcgpsyWgH8Y8ajJz1Cu72KnS5uo2Aa2LpzU7kinSupNKC", 59, false); + } + + #[test] + fn struct_size() { + // this should be true for both 32 and 64 bit archs + assert_eq!(std::mem::size_of::(), 32); + } + + #[test] + fn roundtrip() { + // check that .bytes() returns whatever the storage was created with + for i in 0..((MAX_INLINE + 10) as u8) { + let data = (0..i).collect::>(); + let storage = Storage::from_slice(&data); + assert_eq!(data, storage.bytes()); + } + } + + fn check_invariants(storage: Storage) -> bool { + match storage { + Storage::Inline(len, _) => len as usize <= MAX_INLINE, + Storage::Heap(arc) => arc.len() > MAX_INLINE, + } + } + + quickcheck! { + fn roundtrip_check(data: Vec) -> bool { + let storage = Storage::from_slice(&data); + storage.bytes() == data.as_slice() && check_invariants(storage) + } + } +}