Skip to content

Commit 1d2ea98

Browse files
committed
Auto merge of #95837 - scottmcm:ptr-offset-from-unsigned, r=oli-obk
Add `sub_ptr` on pointers (the `usize` version of `offset_from`) We have `add`/`sub` which are the `usize` versions of `offset`, this adds the `usize` equivalent of `offset_from`. Like how `.add(d)` replaced a whole bunch of `.offset(d as isize)`, you can see from the changes here that it's fairly common that code actually knows the order between the pointers and *wants* a `usize`, not an `isize`. As a bonus, this can do `sub nuw`+`udiv exact`, rather than `sub`+`sdiv exact`, which can be optimized slightly better because it doesn't have to worry about negatives. That's why the slice iterators weren't using `offset_from`, though I haven't updated that code in this PR because slices are so perf-critical that I'll do it as its own change. This is an intrinsic, like `offset_from`, so that it can eventually be allowed in CTFE. It also allows checking the extra safety condition -- see the test confirming that CTFE catches it if you pass the pointers in the wrong order.
2 parents 0cd939e + 003b954 commit 1d2ea98

File tree

20 files changed

+307
-28
lines changed

20 files changed

+307
-28
lines changed

compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -713,14 +713,21 @@ fn codegen_regular_intrinsic_call<'tcx>(
713713
ret.write_cvalue(fx, val);
714714
};
715715

716-
ptr_offset_from, (v ptr, v base) {
716+
ptr_offset_from | ptr_offset_from_unsigned, (v ptr, v base) {
717717
let ty = substs.type_at(0);
718718
let isize_layout = fx.layout_of(fx.tcx.types.isize);
719719

720720
let pointee_size: u64 = fx.layout_of(ty).size.bytes();
721-
let diff = fx.bcx.ins().isub(ptr, base);
721+
let diff_bytes = fx.bcx.ins().isub(ptr, base);
722722
// FIXME this can be an exact division.
723-
let val = CValue::by_val(fx.bcx.ins().sdiv_imm(diff, pointee_size as i64), isize_layout);
723+
let diff = if intrinsic == sym::ptr_offset_from_unsigned {
724+
// Because diff_bytes ULE isize::MAX, this would be fine as signed,
725+
// but unsigned is slightly easier to codegen, so might as well.
726+
fx.bcx.ins().udiv_imm(diff_bytes, pointee_size as i64)
727+
} else {
728+
fx.bcx.ins().sdiv_imm(diff_bytes, pointee_size as i64)
729+
};
730+
let val = CValue::by_val(diff, isize_layout);
724731
ret.write_cvalue(fx, val);
725732
};
726733

compiler/rustc_codegen_ssa/src/mir/intrinsic.rs

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -555,21 +555,28 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
555555
}
556556
}
557557

558-
sym::ptr_offset_from => {
558+
sym::ptr_offset_from | sym::ptr_offset_from_unsigned => {
559559
let ty = substs.type_at(0);
560560
let pointee_size = bx.layout_of(ty).size;
561561

562-
// This is the same sequence that Clang emits for pointer subtraction.
563-
// It can be neither `nsw` nor `nuw` because the input is treated as
564-
// unsigned but then the output is treated as signed, so neither works.
565562
let a = args[0].immediate();
566563
let b = args[1].immediate();
567564
let a = bx.ptrtoint(a, bx.type_isize());
568565
let b = bx.ptrtoint(b, bx.type_isize());
569-
let d = bx.sub(a, b);
570566
let pointee_size = bx.const_usize(pointee_size.bytes());
571-
// this is where the signed magic happens (notice the `s` in `exactsdiv`)
572-
bx.exactsdiv(d, pointee_size)
567+
if name == sym::ptr_offset_from {
568+
// This is the same sequence that Clang emits for pointer subtraction.
569+
// It can be neither `nsw` nor `nuw` because the input is treated as
570+
// unsigned but then the output is treated as signed, so neither works.
571+
let d = bx.sub(a, b);
572+
// this is where the signed magic happens (notice the `s` in `exactsdiv`)
573+
bx.exactsdiv(d, pointee_size)
574+
} else {
575+
// The `_unsigned` version knows the relative ordering of the pointers,
576+
// so can use `sub nuw` and `udiv exact` instead of dealing in signed.
577+
let d = bx.unchecked_usub(a, b);
578+
bx.exactudiv(d, pointee_size)
579+
}
573580
}
574581

575582
_ => {

compiler/rustc_const_eval/src/interpret/intrinsics.rs

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
308308
let offset_ptr = ptr.wrapping_signed_offset(offset_bytes, self);
309309
self.write_pointer(offset_ptr, dest)?;
310310
}
311-
sym::ptr_offset_from => {
311+
sym::ptr_offset_from | sym::ptr_offset_from_unsigned => {
312312
let a = self.read_pointer(&args[0])?;
313313
let b = self.read_pointer(&args[1])?;
314314

@@ -330,8 +330,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
330330
// Both are pointers. They must be into the same allocation.
331331
if a_alloc_id != b_alloc_id {
332332
throw_ub_format!(
333-
"ptr_offset_from cannot compute offset of pointers into different \
334-
allocations.",
333+
"{} cannot compute offset of pointers into different allocations.",
334+
intrinsic_name,
335335
);
336336
}
337337
// And they must both be valid for zero-sized accesses ("in-bounds or one past the end").
@@ -348,16 +348,39 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
348348
CheckInAllocMsg::OffsetFromTest,
349349
)?;
350350

351+
if intrinsic_name == sym::ptr_offset_from_unsigned && a_offset < b_offset {
352+
throw_ub_format!(
353+
"{} cannot compute a negative offset, but {} < {}",
354+
intrinsic_name,
355+
a_offset.bytes(),
356+
b_offset.bytes(),
357+
);
358+
}
359+
351360
// Compute offset.
352361
let usize_layout = self.layout_of(self.tcx.types.usize)?;
353362
let isize_layout = self.layout_of(self.tcx.types.isize)?;
354-
let a_offset = ImmTy::from_uint(a_offset.bytes(), usize_layout);
355-
let b_offset = ImmTy::from_uint(b_offset.bytes(), usize_layout);
356-
let (val, _overflowed, _ty) =
363+
let ret_layout = if intrinsic_name == sym::ptr_offset_from {
364+
isize_layout
365+
} else {
366+
usize_layout
367+
};
368+
369+
// The subtraction is always done in `isize` to enforce
370+
// the "no more than `isize::MAX` apart" requirement.
371+
let a_offset = ImmTy::from_uint(a_offset.bytes(), isize_layout);
372+
let b_offset = ImmTy::from_uint(b_offset.bytes(), isize_layout);
373+
let (val, overflowed, _ty) =
357374
self.overflowing_binary_op(BinOp::Sub, &a_offset, &b_offset)?;
375+
if overflowed {
376+
throw_ub_format!("Pointers were too far apart for {}", intrinsic_name);
377+
}
378+
358379
let pointee_layout = self.layout_of(substs.type_at(0))?;
359-
let val = ImmTy::from_scalar(val, isize_layout);
360-
let size = ImmTy::from_int(pointee_layout.size.bytes(), isize_layout);
380+
// This re-interprets an isize at ret_layout, but we already checked
381+
// that if ret_layout is usize, then the result must be non-negative.
382+
let val = ImmTy::from_scalar(val, ret_layout);
383+
let size = ImmTy::from_int(pointee_layout.size.bytes(), ret_layout);
361384
self.exact_div(&val, &size, dest)?;
362385
}
363386
}

compiler/rustc_span/src/symbol.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1079,6 +1079,7 @@ symbols! {
10791079
ptr_null,
10801080
ptr_null_mut,
10811081
ptr_offset_from,
1082+
ptr_offset_from_unsigned,
10821083
pub_macro_rules,
10831084
pub_restricted,
10841085
pure,

compiler/rustc_typeck/src/check/intrinsic.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,9 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) {
305305
sym::ptr_offset_from => {
306306
(1, vec![tcx.mk_imm_ptr(param(0)), tcx.mk_imm_ptr(param(0))], tcx.types.isize)
307307
}
308+
sym::ptr_offset_from_unsigned => {
309+
(1, vec![tcx.mk_imm_ptr(param(0)), tcx.mk_imm_ptr(param(0))], tcx.types.usize)
310+
}
308311
sym::unchecked_div | sym::unchecked_rem | sym::exact_div => {
309312
(1, vec![param(0), param(0)], param(0))
310313
}

library/alloc/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@
127127
#![feature(pattern)]
128128
#![feature(ptr_internals)]
129129
#![feature(ptr_metadata)]
130+
#![feature(ptr_sub_ptr)]
130131
#![feature(receiver_trait)]
131132
#![feature(set_ptr_value)]
132133
#![feature(slice_group_by)]

library/alloc/src/slice.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1056,7 +1056,7 @@ where
10561056
fn drop(&mut self) {
10571057
// `T` is not a zero-sized type, and these are pointers into a slice's elements.
10581058
unsafe {
1059-
let len = self.end.offset_from(self.start) as usize;
1059+
let len = self.end.sub_ptr(self.start);
10601060
ptr::copy_nonoverlapping(self.start, self.dest, len);
10611061
}
10621062
}

library/alloc/src/vec/drain.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ impl<T, A: Allocator> Drop for Drain<'_, T, A> {
163163
// it from the original vec but also avoid creating a &mut to the front since that could
164164
// invalidate raw pointers to it which some unsafe code might rely on.
165165
let vec_ptr = vec.as_mut().as_mut_ptr();
166-
let drop_offset = drop_ptr.offset_from(vec_ptr) as usize;
166+
let drop_offset = drop_ptr.sub_ptr(vec_ptr);
167167
let to_drop = ptr::slice_from_raw_parts_mut(vec_ptr.add(drop_offset), drop_len);
168168
ptr::drop_in_place(to_drop);
169169
}

library/alloc/src/vec/in_place_collect.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ where
250250
let sink =
251251
self.try_fold::<_, _, Result<_, !>>(sink, write_in_place_with_drop(end)).unwrap();
252252
// iteration succeeded, don't drop head
253-
unsafe { ManuallyDrop::new(sink).dst.offset_from(dst_buf) as usize }
253+
unsafe { ManuallyDrop::new(sink).dst.sub_ptr(dst_buf) }
254254
}
255255
}
256256

library/alloc/src/vec/in_place_drop.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ pub(super) struct InPlaceDrop<T> {
1010

1111
impl<T> InPlaceDrop<T> {
1212
fn len(&self) -> usize {
13-
unsafe { self.dst.offset_from(self.inner) as usize }
13+
unsafe { self.dst.sub_ptr(self.inner) }
1414
}
1515
}
1616

0 commit comments

Comments
 (0)