Skip to content

[AArch64][PAC] Skip llvm.ptrauth.blend intrinsic in GVN PRE #147815

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: users/atrosinenko/pauth-imm-modifier-other
Choose a base branch
from

Conversation

atrosinenko
Copy link
Contributor

The instruction selector on AArch64 implements a best-effort heuristic
to detect the discriminator being computed by llvm.ptrauth.blend
intrinsic. If such pattern is detected, then address and immediate
discriminator components are emitted as two separate operands of the
corresponding pseudo instruction, which is not expanded until
AsmPrinter. This helps enforcing the hard-coded immediate modifier even
when the address part of the discriminator can be modified by an
attacker, something along the lines

mov     x8, x20
movk    x8, #1234, #48
pacda   x0, x8
// ...
bl      callee
mov     x8, x20        // address in x20 can be modified
movk    x8, #1234, #48 // immediate modifier is enforced
pacda   x0, x8

instead of reloading a previously computed discriminator value from the
stack (can be modified by an attacker under Pointer Authentication
threat model) or keeping it in a callee-saved register (may be spilled
to the stack in callee):

movk    x20, #1234, #48
pacda   x0, x20
// ...
bl      callee
pacda   x0, x20         // the entire discriminator can be modified

The instruction selector on AArch64 implements a best-effort heuristic
to detect the discriminator being computed by llvm.ptrauth.blend
intrinsic. If such pattern is detected, then address and immediate
discriminator components are emitted as two separate operands of the
corresponding pseudo instruction, which is not expanded until
AsmPrinter. This helps enforcing the hard-coded immediate modifier even
when the address part of the discriminator can be modified by an
attacker, something along the lines

    mov     x8, x20
    movk    x8, #1234, #48
    pacda   x0, x8
    // ...
    bl      callee
    mov     x8, x20        // address in x20 can be modified
    movk    x8, #1234, #48 // immediate modifier is enforced
    pacda   x0, x8

instead of reloading a previously computed discriminator value from the
stack (can be modified by an attacker under Pointer Authentication
threat model) or keeping it in a callee-saved register (may be spilled
to the stack in callee):

    movk    x20, #1234, #48
    pacda   x0, x20
    // ...
    bl      callee
    pacda   x0, x20         // the entire discriminator can be modified
@llvmbot
Copy link
Member

llvmbot commented Jul 9, 2025

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-transforms

Author: Anatoly Trosinenko (atrosinenko)

Changes

The instruction selector on AArch64 implements a best-effort heuristic
to detect the discriminator being computed by llvm.ptrauth.blend
intrinsic. If such pattern is detected, then address and immediate
discriminator components are emitted as two separate operands of the
corresponding pseudo instruction, which is not expanded until
AsmPrinter. This helps enforcing the hard-coded immediate modifier even
when the address part of the discriminator can be modified by an
attacker, something along the lines

mov     x8, x20
movk    x8, #<!-- -->1234, #<!-- -->48
pacda   x0, x8
// ...
bl      callee
mov     x8, x20        // address in x20 can be modified
movk    x8, #<!-- -->1234, #<!-- -->48 // immediate modifier is enforced
pacda   x0, x8

instead of reloading a previously computed discriminator value from the
stack (can be modified by an attacker under Pointer Authentication
threat model) or keeping it in a callee-saved register (may be spilled
to the stack in callee):

movk    x20, #<!-- -->1234, #<!-- -->48
pacda   x0, x20
// ...
bl      callee
pacda   x0, x20         // the entire discriminator can be modified

Full diff: https://github.com/llvm/llvm-project/pull/147815.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Scalar/GVN.cpp (+7)
  • (added) llvm/test/CodeGen/AArch64/ptrauth-discriminator-components.ll (+164)
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index d9d05c3e8cc49..ac96dc3b7cf26 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -2990,6 +2990,13 @@ bool GVNPass::performScalarPRE(Instruction *CurInst) {
   if (isa<GetElementPtrInst>(CurInst))
     return false;
 
+  // Don't do PRE on ptrauth_blend intrinsic: on AArch64 the instruction
+  // selector wants to take its operands into account when selecting the user
+  // of the blended discriminator, so don't hide the blend behind PHI nodes.
+  if (auto *II = dyn_cast<IntrinsicInst>(CurInst))
+    if (II->getIntrinsicID() == Intrinsic::ptrauth_blend)
+      return false;
+
   if (auto *CallB = dyn_cast<CallBase>(CurInst)) {
     // We don't currently value number ANY inline asm calls.
     if (CallB->isInlineAsm())
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-discriminator-components.ll b/llvm/test/CodeGen/AArch64/ptrauth-discriminator-components.ll
new file mode 100644
index 0000000000000..1987f58b27c72
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ptrauth-discriminator-components.ll
@@ -0,0 +1,164 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple aarch64 -S -passes=gvn                                   | FileCheck --check-prefix=GVN %s
+; RUN: opt < %s -mtriple aarch64 -S -passes='gvn,simplifycfg<hoist-common-insts>' | FileCheck --check-prefix=GVN-SCFG %s
+
+; When processing ptrauth.* intrinsics accepting a discriminator operand
+; on AArch64, the instruction selector tries to detect a common pattern of
+; the discriminator value being computed by a call to `blend(addr, imm)`.
+; In such case, a pseudo instruction is generated with `addr` and `imm` as
+; separate operands, which is not expanded until AsmPrinter. This way, it is
+; possible to enforce the immediate modifier, even if an attacker is able to
+; substitute the address modifier.
+;
+; While it should be more robust to use two separate arguments per discriminator
+; in any relevant intrinsic, a best-effort matching is currently performed by
+; the instruction selector. For that reason, it is important not to introduce
+; PHI nodes hiding the results of multiple identical blend operations.
+
+; In test_simple, four different signed values are stored into memory, but
+; the discriminators are pairwise equal and thus could be moved by GVN's
+; partial redundancy elimination.
+define void @test_simple(i1 %cond, ptr %storage1, ptr %storage2, i64 %a, i64 %b) {
+; GVN-LABEL: define void @test_simple(
+; GVN-SAME: i1 [[COND:%.*]], ptr [[STORAGE1:%.*]], ptr [[STORAGE2:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; GVN-NEXT:  [[ENTRY:.*:]]
+; GVN-NEXT:    [[STORAGE1_I:%.*]] = ptrtoint ptr [[STORAGE1]] to i64
+; GVN-NEXT:    [[STORAGE2_I:%.*]] = ptrtoint ptr [[STORAGE2]] to i64
+; GVN-NEXT:    br i1 [[COND]], label %[[IF_THEN:.*]], label %[[EXIT:.*]]
+; GVN:       [[IF_THEN]]:
+; GVN-NEXT:    [[DISCR1_THEN:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[STORAGE1_I]], i64 42)
+; GVN-NEXT:    [[DISCR2_THEN:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[STORAGE2_I]], i64 42)
+; GVN-NEXT:    [[T1:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[A]], i32 2, i64 [[DISCR1_THEN]])
+; GVN-NEXT:    [[T2:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[A]], i32 2, i64 [[DISCR2_THEN]])
+; GVN-NEXT:    store volatile i64 [[T1]], ptr [[STORAGE1]], align 8
+; GVN-NEXT:    store volatile i64 [[T2]], ptr [[STORAGE2]], align 8
+; GVN-NEXT:    br label %[[EXIT]]
+; GVN:       [[EXIT]]:
+; GVN-NEXT:    [[DISCR1_EXIT:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[STORAGE1_I]], i64 42)
+; GVN-NEXT:    [[DISCR2_EXIT:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[STORAGE2_I]], i64 42)
+; GVN-NEXT:    [[T3:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[B]], i32 2, i64 [[DISCR1_EXIT]])
+; GVN-NEXT:    [[T4:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[B]], i32 2, i64 [[DISCR2_EXIT]])
+; GVN-NEXT:    store volatile i64 [[T3]], ptr [[STORAGE1]], align 8
+; GVN-NEXT:    store volatile i64 [[T4]], ptr [[STORAGE2]], align 8
+; GVN-NEXT:    ret void
+;
+; GVN-SCFG-LABEL: define void @test_simple(
+; GVN-SCFG-SAME: i1 [[COND:%.*]], ptr [[STORAGE1:%.*]], ptr [[STORAGE2:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; GVN-SCFG-NEXT:  [[ENTRY:.*:]]
+; GVN-SCFG-NEXT:    [[STORAGE1_I:%.*]] = ptrtoint ptr [[STORAGE1]] to i64
+; GVN-SCFG-NEXT:    [[STORAGE2_I:%.*]] = ptrtoint ptr [[STORAGE2]] to i64
+; GVN-SCFG-NEXT:    br i1 [[COND]], label %[[IF_THEN:.*]], label %[[EXIT:.*]]
+; GVN-SCFG:       [[IF_THEN]]:
+; GVN-SCFG-NEXT:    [[DISCR1_THEN:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[STORAGE1_I]], i64 42)
+; GVN-SCFG-NEXT:    [[DISCR2_THEN:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[STORAGE2_I]], i64 42)
+; GVN-SCFG-NEXT:    [[T1:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[A]], i32 2, i64 [[DISCR1_THEN]])
+; GVN-SCFG-NEXT:    [[T2:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[A]], i32 2, i64 [[DISCR2_THEN]])
+; GVN-SCFG-NEXT:    store volatile i64 [[T1]], ptr [[STORAGE1]], align 8
+; GVN-SCFG-NEXT:    store volatile i64 [[T2]], ptr [[STORAGE2]], align 8
+; GVN-SCFG-NEXT:    br label %[[EXIT]]
+; GVN-SCFG:       [[EXIT]]:
+; GVN-SCFG-NEXT:    [[DISCR1_EXIT:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[STORAGE1_I]], i64 42)
+; GVN-SCFG-NEXT:    [[DISCR2_EXIT:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[STORAGE2_I]], i64 42)
+; GVN-SCFG-NEXT:    [[T3:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[B]], i32 2, i64 [[DISCR1_EXIT]])
+; GVN-SCFG-NEXT:    [[T4:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[B]], i32 2, i64 [[DISCR2_EXIT]])
+; GVN-SCFG-NEXT:    store volatile i64 [[T3]], ptr [[STORAGE1]], align 8
+; GVN-SCFG-NEXT:    store volatile i64 [[T4]], ptr [[STORAGE2]], align 8
+; GVN-SCFG-NEXT:    ret void
+;
+entry:
+  %storage1.i = ptrtoint ptr %storage1 to i64
+  %storage2.i = ptrtoint ptr %storage2 to i64
+  br i1 %cond, label %if.then, label %exit
+
+if.then:
+  %discr1.then = call i64 @llvm.ptrauth.blend(i64 %storage1.i, i64 42)
+  %discr2.then = call i64 @llvm.ptrauth.blend(i64 %storage2.i, i64 42)
+  %t1 = call i64 @llvm.ptrauth.sign(i64 %a, i32 2, i64 %discr1.then)
+  %t2 = call i64 @llvm.ptrauth.sign(i64 %a, i32 2, i64 %discr2.then)
+  store volatile i64 %t1, ptr %storage1
+  store volatile i64 %t2, ptr %storage2
+  br label %exit
+
+exit:
+  %discr1.exit = call i64 @llvm.ptrauth.blend(i64 %storage1.i, i64 42)
+  %discr2.exit = call i64 @llvm.ptrauth.blend(i64 %storage2.i, i64 42)
+  %t3 = call i64 @llvm.ptrauth.sign(i64 %b, i32 2, i64 %discr1.exit)
+  %t4 = call i64 @llvm.ptrauth.sign(i64 %b, i32 2, i64 %discr2.exit)
+  store volatile i64 %t3, ptr %storage1
+  store volatile i64 %t4, ptr %storage2
+  ret void
+}
+
+; test_interleaved is similar to test_simple, but interleaving blend and sign
+; operations makes it harder for SimplifyCFG pass to hoist blends into the
+; entry basic block later and thus eliminate PHI nodes.
+define void @test_interleaved(i1 %cond, ptr %storage1, ptr %storage2, i64 %a, i64 %b) {
+; GVN-LABEL: define void @test_interleaved(
+; GVN-SAME: i1 [[COND:%.*]], ptr [[STORAGE1:%.*]], ptr [[STORAGE2:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; GVN-NEXT:  [[ENTRY:.*:]]
+; GVN-NEXT:    [[STORAGE1_I:%.*]] = ptrtoint ptr [[STORAGE1]] to i64
+; GVN-NEXT:    [[STORAGE2_I:%.*]] = ptrtoint ptr [[STORAGE2]] to i64
+; GVN-NEXT:    br i1 [[COND]], label %[[IF_THEN:.*]], label %[[EXIT:.*]]
+; GVN:       [[IF_THEN]]:
+; GVN-NEXT:    [[DISCR1_THEN:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[STORAGE1_I]], i64 42)
+; GVN-NEXT:    [[T1:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[A]], i32 2, i64 [[DISCR1_THEN]])
+; GVN-NEXT:    [[DISCR2_THEN:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[STORAGE2_I]], i64 42)
+; GVN-NEXT:    [[T2:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[A]], i32 2, i64 [[DISCR2_THEN]])
+; GVN-NEXT:    store volatile i64 [[T1]], ptr [[STORAGE1]], align 8
+; GVN-NEXT:    store volatile i64 [[T2]], ptr [[STORAGE2]], align 8
+; GVN-NEXT:    br label %[[EXIT]]
+; GVN:       [[EXIT]]:
+; GVN-NEXT:    [[DISCR1_EXIT:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[STORAGE1_I]], i64 42)
+; GVN-NEXT:    [[T3:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[B]], i32 2, i64 [[DISCR1_EXIT]])
+; GVN-NEXT:    [[DISCR2_EXIT:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[STORAGE2_I]], i64 42)
+; GVN-NEXT:    [[T4:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[B]], i32 2, i64 [[DISCR2_EXIT]])
+; GVN-NEXT:    store volatile i64 [[T3]], ptr [[STORAGE1]], align 8
+; GVN-NEXT:    store volatile i64 [[T4]], ptr [[STORAGE2]], align 8
+; GVN-NEXT:    ret void
+;
+; GVN-SCFG-LABEL: define void @test_interleaved(
+; GVN-SCFG-SAME: i1 [[COND:%.*]], ptr [[STORAGE1:%.*]], ptr [[STORAGE2:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; GVN-SCFG-NEXT:  [[ENTRY:.*:]]
+; GVN-SCFG-NEXT:    [[STORAGE1_I:%.*]] = ptrtoint ptr [[STORAGE1]] to i64
+; GVN-SCFG-NEXT:    [[STORAGE2_I:%.*]] = ptrtoint ptr [[STORAGE2]] to i64
+; GVN-SCFG-NEXT:    br i1 [[COND]], label %[[IF_THEN:.*]], label %[[EXIT:.*]]
+; GVN-SCFG:       [[IF_THEN]]:
+; GVN-SCFG-NEXT:    [[DISCR1_THEN:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[STORAGE1_I]], i64 42)
+; GVN-SCFG-NEXT:    [[T1:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[A]], i32 2, i64 [[DISCR1_THEN]])
+; GVN-SCFG-NEXT:    [[DISCR2_THEN:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[STORAGE2_I]], i64 42)
+; GVN-SCFG-NEXT:    [[T2:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[A]], i32 2, i64 [[DISCR2_THEN]])
+; GVN-SCFG-NEXT:    store volatile i64 [[T1]], ptr [[STORAGE1]], align 8
+; GVN-SCFG-NEXT:    store volatile i64 [[T2]], ptr [[STORAGE2]], align 8
+; GVN-SCFG-NEXT:    br label %[[EXIT]]
+; GVN-SCFG:       [[EXIT]]:
+; GVN-SCFG-NEXT:    [[DISCR1_EXIT:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[STORAGE1_I]], i64 42)
+; GVN-SCFG-NEXT:    [[T3:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[B]], i32 2, i64 [[DISCR1_EXIT]])
+; GVN-SCFG-NEXT:    [[DISCR2_EXIT:%.*]] = call i64 @llvm.ptrauth.blend(i64 [[STORAGE2_I]], i64 42)
+; GVN-SCFG-NEXT:    [[T4:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[B]], i32 2, i64 [[DISCR2_EXIT]])
+; GVN-SCFG-NEXT:    store volatile i64 [[T3]], ptr [[STORAGE1]], align 8
+; GVN-SCFG-NEXT:    store volatile i64 [[T4]], ptr [[STORAGE2]], align 8
+; GVN-SCFG-NEXT:    ret void
+;
+entry:
+  %storage1.i = ptrtoint ptr %storage1 to i64
+  %storage2.i = ptrtoint ptr %storage2 to i64
+  br i1 %cond, label %if.then, label %exit
+
+if.then:
+  %discr1.then = call i64 @llvm.ptrauth.blend(i64 %storage1.i, i64 42)
+  %t1 = call i64 @llvm.ptrauth.sign(i64 %a, i32 2, i64 %discr1.then)
+  %discr2.then = call i64 @llvm.ptrauth.blend(i64 %storage2.i, i64 42)
+  %t2 = call i64 @llvm.ptrauth.sign(i64 %a, i32 2, i64 %discr2.then)
+  store volatile i64 %t1, ptr %storage1
+  store volatile i64 %t2, ptr %storage2
+  br label %exit
+
+exit:
+  %discr1.exit = call i64 @llvm.ptrauth.blend(i64 %storage1.i, i64 42)
+  %t3 = call i64 @llvm.ptrauth.sign(i64 %b, i32 2, i64 %discr1.exit)
+  %discr2.exit = call i64 @llvm.ptrauth.blend(i64 %storage2.i, i64 42)
+  %t4 = call i64 @llvm.ptrauth.sign(i64 %b, i32 2, i64 %discr2.exit)
+  store volatile i64 %t3, ptr %storage1
+  store volatile i64 %t4, ptr %storage2
+  ret void
+}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants