Merge pull request #29 from ecstatic-morse/better-merge

nikomatsakis · web-flow · commit 43b20080fbdc · 2021-05-04T17:24:45.000-04:00
Speed up `Relation::merge`
diff --git a/src/lib.rs b/src/lib.rs
@@ -12,13 +12,13 @@
 #![forbid(missing_docs)]
 
 use std::cell::RefCell;
-use std::cmp::Ordering;
 use std::io::Write;
 use std::iter::FromIterator;
 use std::rc::Rc;
 
 mod join;
 mod map;
+mod merge;
 mod test;
 mod treefrog;
 pub use crate::join::JoinInput;
@@ -45,63 +45,7 @@ pub struct Relation<Tuple: Ord> {
 impl<Tuple: Ord> Relation<Tuple> {
     /// Merges two relations into their union.
     pub fn merge(self, other: Self) -> Self {
-        let Relation {
-            elements: mut elements1,
-        } = self;
-        let Relation {
-            elements: mut elements2,
-        } = other;
-
-        // If one of the element lists is zero-length, we don't need to do any work
-        if elements1.is_empty() {
-            return Relation {
-                elements: elements2,
-            };
-        }
-
-        if elements2.is_empty() {
-            return Relation {
-                elements: elements1,
-            };
-        }
-
-        // Make sure that elements1 starts with the lower element
-        // Will not panic since both collections must have at least 1 element at this point
-        if elements1[0] > elements2[0] {
-            std::mem::swap(&mut elements1, &mut elements2);
-        }
-
-        // Fast path for when all the new elements are after the exiting ones
-        if elements1[elements1.len() - 1] < elements2[0] {
-            elements1.extend(elements2.into_iter());
-            // println!("fast path");
-            return Relation {
-                elements: elements1,
-            };
-        }
-
-        let mut elements = Vec::with_capacity(elements1.len() + elements2.len());
-        let mut elements1 = elements1.drain(..);
-        let mut elements2 = elements2.drain(..).peekable();
-
-        elements.push(elements1.next().unwrap());
-        if elements.first() == elements2.peek() {
-            elements2.next();
-        }
-
-        for elem in elements1 {
-            while elements2.peek().map(|x| x.cmp(&elem)) == Some(Ordering::Less) {
-                elements.push(elements2.next().unwrap());
-            }
-            if elements2.peek().map(|x| x.cmp(&elem)) == Some(Ordering::Equal) {
-                elements2.next();
-            }
-            elements.push(elem);
-        }
-
-        // Finish draining second list
-        elements.extend(elements2);
-
+        let elements = merge::merge_unique(self.elements, other.elements);
         Relation { elements }
     }
 
diff --git a/src/merge.rs b/src/merge.rs
@@ -0,0 +1,95 @@
+//! Subroutines for merging sorted lists efficiently.
+
+use std::cmp::Ordering;
+
+/// Merges two sorted lists into a single sorted list, ignoring duplicates.
+pub fn merge_unique<T: Ord>(mut a: Vec<T>, mut b: Vec<T>) -> Vec<T> {
+    // If one of the lists is zero-length, we don't need to do any work.
+    if a.is_empty() {
+        return b;
+    }
+    if b.is_empty() {
+        return a;
+    }
+
+    // Fast path for when all the new elements are after the existing ones.
+    //
+    // Cannot panic because we check for empty inputs above.
+    if *a.last().unwrap() < b[0] {
+        a.append(&mut b);
+        return a;
+    }
+    if *b.last().unwrap() < a[0] {
+        b.append(&mut a);
+        return b;
+    }
+
+    // Ensure that `out` always has sufficient capacity.
+    //
+    // SAFETY: The calls to `push_unchecked` below are safe because of this.
+    let mut out = Vec::with_capacity(a.len() + b.len());
+
+    let mut a = a.into_iter();
+    let mut b = b.into_iter();
+
+    // While both inputs have elements remaining, copy the lesser element to the output vector.
+    while a.len() != 0 && b.len() != 0 {
+        // SAFETY: The following calls to `get_unchecked` and `next_unchecked` are safe because we
+        // ensure that `a.len() > 0` and `b.len() > 0` inside the loop.
+        //
+        // I was hoping to avoid using "unchecked" operations, but it seems the bounds checks
+        // don't get optimized away. Using `ExactSizeIterator::is_empty` instead of checking `len`
+        // seemed to help, but that method is unstable.
+
+        let a_elem = unsafe { a.as_slice().get_unchecked(0) };
+        let b_elem = unsafe { b.as_slice().get_unchecked(0) };
+        match a_elem.cmp(b_elem) {
+            Ordering::Less => unsafe { push_unchecked(&mut out, next_unchecked(&mut a)) },
+            Ordering::Greater => unsafe { push_unchecked(&mut out, next_unchecked(&mut b)) },
+            Ordering::Equal => unsafe {
+                push_unchecked(&mut out, next_unchecked(&mut a));
+                std::mem::drop(next_unchecked(&mut b));
+            },
+        }
+    }
+
+    // Once either `a` or `b` runs out of elements, copy all remaining elements in the other one
+    // directly to the back of the output list.
+    //
+    // This branch is free because we have to check `a.is_empty()` above anyways.
+    //
+    // Calling `push_unchecked` in a loop was slightly faster than `out.extend(...)`
+    // despite the fact that `std::vec::IntoIter` implements `TrustedLen`.
+    if a.len() != 0 {
+        for elem in a {
+            unsafe {
+                push_unchecked(&mut out, elem);
+            }
+        }
+    } else {
+        for elem in b {
+            unsafe {
+                push_unchecked(&mut out, elem);
+            }
+        }
+    }
+
+    out
+}
+
+/// Pushes `value` to `vec` without checking that the vector has sufficient capacity.
+///
+/// If `vec.len() == vec.cap()`, calling this function is UB.
+unsafe fn push_unchecked<T>(vec: &mut Vec<T>, value: T) {
+    let end = vec.as_mut_ptr().add(vec.len());
+    std::ptr::write(end, value);
+    vec.set_len(vec.len() + 1);
+}
+
+/// Equivalent to `iter.next().unwrap()` that is UB to call when `iter` is empty.
+unsafe fn next_unchecked<T>(iter: &mut std::vec::IntoIter<T>) -> T {
+    match iter.next() {
+        Some(x) => x,
+        None => std::hint::unreachable_unchecked(),
+    }
+}