Skip to content

Commit 7401672

Browse files
authored
[DSE] Update dereferenceable attributes when adjusting memintrinsic ptr (#125073)
Consider IR like this call void @llvm.memset.p0.i64(ptr dereferenceable(28) %p, i8 0, i64 28, i1 false) store i32 1, ptr %p In the past it has been optimized like this: %p2 = getelementptr inbounds i8, ptr %p, i64 4 call void @llvm.memset.p0.i64(ptr dereferenceable(28) %p2, i8 0, i64 24, i1 false) store i32 1, ptr %p As the input IR doesn't guarantee that it is OK to deref 28 bytes starting at the adjusted pointer %p2 the transformation has been a bit flawed. With this patch we make sure to drop any dereferenceable/dereferenceable_or_null attributes when doing such transforms. An alternative would have been to adjust the amount of dereferenceable bytes, but since a memset with a constant length already implies dereferenceability by itself it is simpler to just drop the attributes. The new filtering of attributes is done using a helper that only keep attributes that we explicitly handle. For the adjusted mem instrinsic pointers that currently involve "NonNull", "NoUndef" and "Alignment" (when the alignment is known to be fulfilled also after offsetting the pointer). Fixes #115976
1 parent bfcec19 commit 7401672

File tree

2 files changed

+69
-0
lines changed

2 files changed

+69
-0
lines changed

llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
#include "llvm/Analysis/TargetLibraryInfo.h"
5151
#include "llvm/Analysis/ValueTracking.h"
5252
#include "llvm/IR/Argument.h"
53+
#include "llvm/IR/AttributeMask.h"
5354
#include "llvm/IR/BasicBlock.h"
5455
#include "llvm/IR/Constant.h"
5556
#include "llvm/IR/ConstantRangeList.h"
@@ -563,6 +564,43 @@ static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
563564
for_each(LinkedDVRAssigns, InsertAssignForOverlap);
564565
}
565566

567+
/// Update the attributes given that a memory access is updated (the
568+
/// dereferenced pointer could be moved forward when shortening a
569+
/// mem intrinsic).
570+
static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo,
571+
uint64_t PtrOffset) {
572+
// Remember old attributes.
573+
AttributeSet OldAttrs = Intrinsic->getParamAttributes(ArgNo);
574+
575+
// Find attributes that should be kept, and remove the rest.
576+
AttributeMask AttrsToRemove;
577+
for (auto &Attr : OldAttrs) {
578+
if (Attr.hasKindAsEnum()) {
579+
switch (Attr.getKindAsEnum()) {
580+
default:
581+
break;
582+
case Attribute::Alignment:
583+
// Only keep alignment if PtrOffset satisfy the alignment.
584+
if (isAligned(Attr.getAlignment().valueOrOne(), PtrOffset))
585+
continue;
586+
break;
587+
case Attribute::Dereferenceable:
588+
case Attribute::DereferenceableOrNull:
589+
// We could reduce the size of these attributes according to
590+
// PtrOffset. But we simply drop these for now.
591+
break;
592+
case Attribute::NonNull:
593+
case Attribute::NoUndef:
594+
continue;
595+
}
596+
}
597+
AttrsToRemove.addAttribute(Attr);
598+
}
599+
600+
// Remove the attributes that should be dropped.
601+
Intrinsic->removeParamAttrs(ArgNo, AttrsToRemove);
602+
}
603+
566604
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
567605
uint64_t &DeadSize, int64_t KillingStart,
568606
uint64_t KillingSize, bool IsOverwriteEnd) {
@@ -644,6 +682,7 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
644682
DeadI->getIterator());
645683
NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
646684
DeadIntrinsic->setDest(NewDestGEP);
685+
adjustArgAttributes(DeadIntrinsic, 0, ToRemoveSize);
647686
}
648687

649688
// Update attached dbg.assign intrinsics. Assume 8-bit byte.

llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,3 +402,33 @@ entry:
402402
store i64 1, ptr %p, align 1
403403
ret void
404404
}
405+
406+
; Verify that we adjust/drop the dereferenceable attribute.
407+
define void @dereferenceable(ptr nocapture %p) {
408+
; CHECK-LABEL: @dereferenceable(
409+
; CHECK-NEXT: entry:
410+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
411+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i1 false)
412+
; CHECK-NEXT: store i32 1, ptr [[P]], align 4
413+
; CHECK-NEXT: ret void
414+
;
415+
entry:
416+
call void @llvm.memset.p0.i64(ptr dereferenceable(28) align 4 %p, i8 0, i64 28, i1 false)
417+
store i32 1, ptr %p, align 4
418+
ret void
419+
}
420+
421+
; Verify that we adjust/drop the dereferenceable_or_null attribute.
422+
define void @dereferenceable_or_null(ptr nocapture %p) {
423+
; CHECK-LABEL: @dereferenceable_or_null(
424+
; CHECK-NEXT: entry:
425+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 8
426+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 20, i1 false)
427+
; CHECK-NEXT: store i64 1, ptr [[P]], align 4
428+
; CHECK-NEXT: ret void
429+
;
430+
entry:
431+
call void @llvm.memset.p0.i64(ptr dereferenceable_or_null(28) align 4 %p, i8 0, i64 28, i1 false)
432+
store i64 1, ptr %p, align 4
433+
ret void
434+
}

0 commit comments

Comments
 (0)