|
| 1 | +//! Subroutines for merging sorted lists efficiently. |
| 2 | +
|
| 3 | +use std::cmp::Ordering; |
| 4 | + |
| 5 | +/// Merges two sorted lists into a single sorted list, ignoring duplicates. |
| 6 | +pub fn merge_unique<T: Ord>(mut a: Vec<T>, mut b: Vec<T>) -> Vec<T> { |
| 7 | + // If one of the lists is zero-length, we don't need to do any work. |
| 8 | + if a.is_empty() { |
| 9 | + return b; |
| 10 | + } |
| 11 | + if b.is_empty() { |
| 12 | + return a; |
| 13 | + } |
| 14 | + |
| 15 | + // Fast path for when all the new elements are after the existing ones. |
| 16 | + // |
| 17 | + // Cannot panic because we check for empty inputs above. |
| 18 | + if *a.last().unwrap() < b[0] { |
| 19 | + a.append(&mut b); |
| 20 | + return a; |
| 21 | + } |
| 22 | + if *b.last().unwrap() < a[0] { |
| 23 | + b.append(&mut a); |
| 24 | + return b; |
| 25 | + } |
| 26 | + |
| 27 | + // Ensure that `out` always has sufficient capacity. |
| 28 | + // |
| 29 | + // SAFETY: The calls to `push_unchecked` below are safe because of this. |
| 30 | + let mut out = Vec::with_capacity(a.len() + b.len()); |
| 31 | + |
| 32 | + let mut a = a.into_iter(); |
| 33 | + let mut b = b.into_iter(); |
| 34 | + |
| 35 | + // While both inputs have elements remaining, copy the lesser element to the output vector. |
| 36 | + while a.len() != 0 && b.len() != 0 { |
| 37 | + // SAFETY: The following calls to `get_unchecked` and `next_unchecked` are safe because we |
| 38 | + // ensure that `a.len() > 0` and `b.len() > 0` inside the loop. |
| 39 | + // |
| 40 | + // I was hoping to avoid using "unchecked" operations, but it seems the bounds checks |
| 41 | + // don't get optimized away. Using `ExactSizeIterator::is_empty` instead of checking `len` |
| 42 | + // seemed to help, but that method is unstable. |
| 43 | + |
| 44 | + let a_elem = unsafe { a.as_slice().get_unchecked(0) }; |
| 45 | + let b_elem = unsafe { b.as_slice().get_unchecked(0) }; |
| 46 | + match a_elem.cmp(b_elem) { |
| 47 | + Ordering::Less => unsafe { push_unchecked(&mut out, next_unchecked(&mut a)) }, |
| 48 | + Ordering::Greater => unsafe { push_unchecked(&mut out, next_unchecked(&mut b)) }, |
| 49 | + Ordering::Equal => unsafe { |
| 50 | + push_unchecked(&mut out, next_unchecked(&mut a)); |
| 51 | + std::mem::drop(next_unchecked(&mut b)); |
| 52 | + }, |
| 53 | + } |
| 54 | + } |
| 55 | + |
| 56 | + // Once either `a` or `b` runs out of elements, copy all remaining elements in the other one |
| 57 | + // directly to the back of the output list. |
| 58 | + // |
| 59 | + // This branch is free because we have to check `a.is_empty()` above anyways. |
| 60 | + // |
| 61 | + // Calling `push_unchecked` in a loop was slightly faster than `out.extend(...)` |
| 62 | + // despite the fact that `std::vec::IntoIter` implements `TrustedLen`. |
| 63 | + if a.len() != 0 { |
| 64 | + for elem in a { |
| 65 | + unsafe { |
| 66 | + push_unchecked(&mut out, elem); |
| 67 | + } |
| 68 | + } |
| 69 | + } else { |
| 70 | + for elem in b { |
| 71 | + unsafe { |
| 72 | + push_unchecked(&mut out, elem); |
| 73 | + } |
| 74 | + } |
| 75 | + } |
| 76 | + |
| 77 | + out |
| 78 | +} |
| 79 | + |
| 80 | +/// Pushes `value` to `vec` without checking that the vector has sufficient capacity. |
| 81 | +/// |
| 82 | +/// If `vec.len() == vec.cap()`, calling this function is UB. |
| 83 | +unsafe fn push_unchecked<T>(vec: &mut Vec<T>, value: T) { |
| 84 | + let end = vec.as_mut_ptr().add(vec.len()); |
| 85 | + std::ptr::write(end, value); |
| 86 | + vec.set_len(vec.len() + 1); |
| 87 | +} |
| 88 | + |
| 89 | +/// Equivalent to `iter.next().unwrap()` that is UB to call when `iter` is empty. |
| 90 | +unsafe fn next_unchecked<T>(iter: &mut std::vec::IntoIter<T>) -> T { |
| 91 | + match iter.next() { |
| 92 | + Some(x) => x, |
| 93 | + None => std::hint::unreachable_unchecked(), |
| 94 | + } |
| 95 | +} |
0 commit comments