Skip to content

Commit 150347d

Browse files
committed
Auto merge of #106343 - the8472:slice-iter-fold, r=scottmcm
optimize slice::Iter::fold Fixes 2 of 4 cases from #106288 ``` OLD: test slice::fold_to_last ... bench: 248 ns/iter (+/- 3) NEW: test slice::fold_to_last ... bench: 0 ns/iter (+/- 0) ```
2 parents 83e727b + ebee2ff commit 150347d

File tree

2 files changed

+42
-0
lines changed

2 files changed

+42
-0
lines changed

core/benches/slice.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use core::ptr::NonNull;
12
use test::black_box;
23
use test::Bencher;
34

@@ -162,3 +163,11 @@ fn fill_byte_sized(b: &mut Bencher) {
162163
black_box(slice.fill(black_box(NewType(42))));
163164
});
164165
}
166+
167+
// Tests the ability of the compiler to recognize that only the last slice item is needed
168+
// based on issue #106288
169+
#[bench]
170+
fn fold_to_last(b: &mut Bencher) {
171+
let slice: &[i32] = &[0; 1024];
172+
b.iter(|| black_box(slice).iter().fold(None, |_, r| Some(NonNull::from(r))));
173+
}

core/src/slice/iter/macros.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,39 @@ macro_rules! iterator {
191191
self.next_back()
192192
}
193193

194+
#[inline]
195+
fn fold<B, F>(self, init: B, mut f: F) -> B
196+
where
197+
F: FnMut(B, Self::Item) -> B,
198+
{
199+
// this implementation consists of the following optimizations compared to the
200+
// default implementation:
201+
// - do-while loop, as is llvm's preferred loop shape,
202+
// see https://releases.llvm.org/16.0.0/docs/LoopTerminology.html#more-canonical-loops
203+
// - bumps an index instead of a pointer since the latter case inhibits
204+
// some optimizations, see #111603
205+
// - avoids Option wrapping/matching
206+
if is_empty!(self) {
207+
return init;
208+
}
209+
let mut acc = init;
210+
let mut i = 0;
211+
let len = len!(self);
212+
loop {
213+
// SAFETY: the loop iterates `i in 0..len`, which always is in bounds of
214+
// the slice allocation
215+
acc = f(acc, unsafe { & $( $mut_ )? *self.ptr.add(i).as_ptr() });
216+
// SAFETY: `i` can't overflow since it'll only reach usize::MAX if the
217+
// slice had that length, in which case we'll break out of the loop
218+
// after the increment
219+
i = unsafe { i.unchecked_add(1) };
220+
if i == len {
221+
break;
222+
}
223+
}
224+
acc
225+
}
226+
194227
// We override the default implementation, which uses `try_fold`,
195228
// because this simple implementation generates less LLVM IR and is
196229
// faster to compile.

0 commit comments

Comments
 (0)