libp2p · rklaehn · Mar 24, 2020 · Mar 26, 2020 · Mar 26, 2020 · Mar 26, 2020
diff --git a/misc/multiaddr/Cargo.toml b/misc/multiaddr/Cargo.toml
@@ -19,9 +19,15 @@ serde = "1.0.70"
 static_assertions = "1.1"
 unsigned-varint = "0.3"
 url = { version = "2.1.0", default-features = false }
+smallvec = { version = "1.0", features = ["write"] }
 
 [dev-dependencies]
 bincode = "1"
 quickcheck = "0.9.0"
 rand = "0.7.2"
 serde_json = "1.0"
+criterion = "0.3"
+
+[[bench]]
+name = "clone"
+harness = false
diff --git a/misc/multiaddr/benches/clone.rs b/misc/multiaddr/benches/clone.rs
@@ -0,0 +1,52 @@
+// Copyright 2019 Parity Technologies (UK) Ltd.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+//! This benchmark tests the speed of cloning of the largest possible inlined
+//! multiaddr vs the smalles possible heap allocated multiaddr.
+//!
+//! Note that the main point of the storage optimization is not to speed up clone
+//! but to avoid allocating on the heap at all, but still you see a nice benefit
+//! in the speed of cloning.
+use criterion::{Bencher, Criterion, criterion_main, criterion_group, black_box};
+use parity_multiaddr::Multiaddr;
+
+fn do_clone(multiaddr: &Multiaddr) -> usize {
+    let mut res = 0usize;
+    for _ in 0..10 {
+        res += multiaddr.clone().as_ref().len()
+    }
+    res
+}
+
+fn clone(bench: &mut Bencher, addr: &Multiaddr) {
+    bench.iter(|| do_clone(black_box(addr)))
+}
+
+fn criterion_benchmarks(bench: &mut Criterion) {
+    let inlined: Multiaddr = "/dns4/01234567890123456789123/tcp/80/ws".parse().unwrap();
+    let heap: Multiaddr = "/dns4/0123456789012345678901234/tcp/80/ws".parse().unwrap();
+    assert_eq!(inlined.as_ref().len(), 30);
+    assert_eq!(heap.as_ref().len(), 32);
+
+    bench.bench_function("clone 10 max inlined", |b| clone(b, &inlined));
+    bench.bench_function("clone 10 min heap", |b| clone(b, &heap));
+}
+
+criterion_group!(benches, criterion_benchmarks);
+criterion_main!(benches);
diff --git a/misc/multiaddr/src/lib.rs b/misc/multiaddr/src/lib.rs
@@ -6,6 +6,7 @@ mod protocol;
 mod onion_addr;
 mod errors;
 mod from_url;
+mod storage;
 
 use serde::{
     Deserialize,
@@ -17,13 +18,13 @@ use serde::{
 use std::{
     convert::TryFrom,
     fmt,
-    io,
+    hash,
     iter::FromIterator,
     net::{IpAddr, Ipv4Addr, Ipv6Addr},
     result::Result as StdResult,
     str::FromStr,
-    sync::Arc
 };
+use storage::Storage;
 pub use self::errors::{Result, Error};
 pub use self::from_url::{FromUrlErr, from_url, from_url_lossy};
 pub use self::protocol::Protocol;
@@ -36,28 +37,23 @@ static_assertions::const_assert! {
 }
 
 /// Representation of a Multiaddr.
-#[derive(PartialEq, Eq, Clone, Hash)]
-pub struct Multiaddr { bytes: Arc<Vec<u8>> }
+#[derive(Clone)]
+pub struct Multiaddr { storage: Storage }
 
 impl Multiaddr {
     /// Create a new, empty multiaddress.
     pub fn empty() -> Self {
-        Self { bytes: Arc::new(Vec::new()) }
-    }
-
-    /// Create a new, empty multiaddress with the given capacity.
-    pub fn with_capacity(n: usize) -> Self {
-        Self { bytes: Arc::new(Vec::with_capacity(n)) }
+        Self { storage: Storage::from_slice(&[]) }
     }
 
     /// Return the length in bytes of this multiaddress.
     pub fn len(&self) -> usize {
-        self.bytes.len()
+        self.as_ref().len()
     }
 
     /// Return a copy of this [`Multiaddr`]'s byte representation.
     pub fn to_vec(&self) -> Vec<u8> {
-        Vec::from(&self.bytes[..])
+        self.as_ref().to_vec()
     }
 
     /// Adds an already-parsed address component to the end of this multiaddr.
@@ -73,9 +69,9 @@ impl Multiaddr {
     /// ```
     ///
     pub fn push(&mut self, p: Protocol<'_>) {
-        let mut w = io::Cursor::<&mut Vec<u8>>::new(Arc::make_mut(&mut self.bytes));
-        w.set_position(w.get_ref().len() as u64);
-        p.write_bytes(&mut w).expect("Writing to a `io::Cursor<&mut Vec<u8>>` never fails.")
+        let mut w: smallvec::SmallVec<[u8; 32]> = self.as_ref().into();
+        p.write_bytes(&mut w).expect("Writing to a `Buffer` never fails.");
+        self.storage = Storage::from_slice(&w);
     }
 
     /// Pops the last `Protocol` of this multiaddr, or `None` if the multiaddr is empty.
@@ -89,7 +85,7 @@ impl Multiaddr {
     /// ```
     ///
     pub fn pop<'a>(&mut self) -> Option<Protocol<'a>> {
-        let mut slice = &self.bytes[..]; // the remaining multiaddr slice
+        let mut slice = self.as_ref(); // the remaining multiaddr slice
         if slice.is_empty() {
             return None
         }
@@ -100,16 +96,14 @@ impl Multiaddr {
             }
             slice = s
         };
-        let remaining_len = self.bytes.len() - slice.len();
-        Arc::make_mut(&mut self.bytes).truncate(remaining_len);
+        let remaining_len = self.as_ref().len() - slice.len();
+        self.storage = Storage::from_slice(&self.as_ref()[..remaining_len]);
         Some(protocol)
     }
 
     /// Like [`Multiaddr::push`] but consumes `self`.
     pub fn with(mut self, p: Protocol<'_>) -> Self {
-        let mut w = io::Cursor::<&mut Vec<u8>>::new(Arc::make_mut(&mut self.bytes));
-        w.set_position(w.get_ref().len() as u64);
-        p.write_bytes(&mut w).expect("Writing to a `io::Cursor<&mut Vec<u8>>` never fails.");
+        self.push(p);
         self
     }
 
@@ -130,7 +124,7 @@ impl Multiaddr {
     /// ```
     ///
     pub fn iter(&self) -> Iter<'_> {
-        Iter(&self.bytes)
+        Iter(&self.as_ref())
     }
 
     /// Replace a [`Protocol`] at some position in this `Multiaddr`.
@@ -145,7 +139,7 @@ impl Multiaddr {
     where
         F: FnOnce(&Protocol) -> Option<Protocol<'a>>
     {
-        let mut address = Multiaddr::with_capacity(self.len());
+        let mut address = Multiaddr::empty();
         let mut fun = Some(by);
         let mut replaced = false;
 
@@ -192,9 +186,23 @@ impl fmt::Display for Multiaddr {
     }
 }
 
+impl PartialEq for Multiaddr {
+    fn eq(&self, other: &Self) -> bool {
+        self.as_ref() == other.as_ref()
+    }
+}
+
+impl Eq for Multiaddr {}
+
+impl hash::Hash for Multiaddr {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        self.as_ref().hash(state);
+    }
+}
+
 impl AsRef<[u8]> for Multiaddr {
     fn as_ref(&self) -> &[u8] {
-        self.bytes.as_ref()
+        self.storage.bytes()
     }
 }
 
@@ -203,7 +211,7 @@ impl<'a> IntoIterator for &'a Multiaddr {
     type IntoIter = Iter<'a>;
 
     fn into_iter(self) -> Iter<'a> {
-        Iter(&self.bytes)
+        Iter(&self.as_ref())
     }
 }
 
@@ -216,7 +224,7 @@ impl<'a> FromIterator<Protocol<'a>> for Multiaddr {
         for cmp in iter {
             cmp.write_bytes(&mut writer).expect("Writing to a `Vec` never fails.");
         }
-        Multiaddr { bytes: Arc::new(writer) }
+        Multiaddr { storage: Storage::from_slice(&writer) }
     }
 }
 
@@ -237,7 +245,7 @@ impl FromStr for Multiaddr {
             p.write_bytes(&mut writer).expect("Writing to a `Vec` never fails.");
         }
 
-        Ok(Multiaddr { bytes: Arc::new(writer) })
+        Ok(Multiaddr { storage: Storage::from_slice(&writer) })
     }
 }
 
@@ -264,7 +272,7 @@ impl<'a> From<Protocol<'a>> for Multiaddr {
     fn from(p: Protocol<'a>) -> Multiaddr {
         let mut w = Vec::new();
         p.write_bytes(&mut w).expect("Writing to a `Vec` never fails.");
-        Multiaddr { bytes: Arc::new(w) }
+        Multiaddr { storage: Storage::from_slice(&w) }
     }
 }
 
@@ -299,7 +307,7 @@ impl TryFrom<Vec<u8>> for Multiaddr {
             let (_, s) = Protocol::from_bytes(slice)?;
             slice = s
         }
-        Ok(Multiaddr { bytes: Arc::new(v) })
+        Ok(Multiaddr { storage: Storage::from_slice(&v) })
     }
 }
 

diff --git a/misc/multiaddr/src/storage.rs b/misc/multiaddr/src/storage.rs
@@ -0,0 +1,110 @@
+// Copyright 2020 Parity Technologies (UK) Ltd.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+use std::sync::Arc;
+
+/// MAX_INLINE is the maximum size of a multiaddr that can be stored inline.
+/// There is an overhead of 2 bytes, 1 for the length and 1 for the enum discriminator.
+/// 30 is chosen so that the overall size is 32. This should still be big enough to fit
+/// a multiaddr containing an ipv4 or ipv6 address and port.
+///
+/// More complex multiaddrs like those containing peer ids will be stored on the heap.
+const MAX_INLINE: usize = 30;
+
+#[derive(Clone)]
+pub(crate) enum Storage {
+    /// hash is stored inline if it is smaller than MAX_INLINE
+    Inline(u8, [u8; MAX_INLINE]),
+    /// hash is stored on the heap. this must be only used if the hash is actually larger than
+    /// MAX_INLINE bytes to ensure a unique representation.
+    Heap(Arc<[u8]>),
+}
+
+impl Storage {
+    /// The raw bytes.
+    pub fn bytes(&self) -> &[u8] {
+        match self {
+            Storage::Inline(len, bytes) => &bytes[..(*len as usize)],
+            Storage::Heap(data) => &data,
+        }
+    }
+
+    /// Creates storage from a slice.
+    /// For a size up to MAX_INLINE, this will not allocate.
+    pub fn from_slice(slice: &[u8]) -> Self {
+        let len = slice.len();
+        if len <= MAX_INLINE {
+            let mut data: [u8; MAX_INLINE] = [0; MAX_INLINE];
+            data[..len].copy_from_slice(slice);
+            Storage::Inline(len as u8, data)
+        } else {
+            Storage::Heap(slice.into())
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::Multiaddr;
+    use super::{Storage, MAX_INLINE};
+    use quickcheck::quickcheck;
+
+    #[test]
+    fn multihash_size() {
+        fn assert_size(ma: &str, n: usize, inline: bool) {
+            let ma: Multiaddr = ma.parse().unwrap();
+            assert_eq!(ma.as_ref().len(), n);
+            assert_eq!(n <= MAX_INLINE, inline);
+        }
+        assert_size("/ip4/127.0.0.1", 5, true);
+        assert_size("/ip6/2001:8a0:7ac5:4201:3ac9:86ff:fe31:7095/tcp/8000", 20, true);
+        assert_size("/dns4/0123456789012345678901234/tcp/8000", 30, true);
+        assert_size("/ip6/2001:8a0:7ac5:4201:3ac9:86ff:fe31:7095/tcp/8000/ws/p2p/QmcgpsyWgH8Y8ajJz1Cu72KnS5uo2Aa2LpzU7kinSupNKC", 59, false);
+    }
+
+    #[test]
+    fn struct_size() {
+        // this should be true for both 32 and 64 bit archs
+        assert_eq!(std::mem::size_of::<Storage>(), 32);
+    }
+
+    #[test]
+    fn roundtrip() {
+        // check that .bytes() returns whatever the storage was created with
+        for i in 0..((MAX_INLINE + 10) as u8) {
+            let data = (0..i).collect::<Vec<u8>>();
+            let storage = Storage::from_slice(&data);
+            assert_eq!(data, storage.bytes());
+        }
+    }
+
+    fn check_invariants(storage: Storage) -> bool {
+        match storage {
+            Storage::Inline(len, _) => len as usize <= MAX_INLINE,
+            Storage::Heap(arc) => arc.len() > MAX_INLINE,
+        }
+    }
+
+    quickcheck! {
+        fn roundtrip_check(data: Vec<u8>) -> bool {
+            let storage = Storage::from_slice(&data);
+            storage.bytes() == data.as_slice() && check_invariants(storage)
+        }
+    }
+}