Skip to content

Commit 4d7f9b7

Browse files
committed
X86: Don't fold TEST into ADD ...@GOTTPOFF/GOTNTPOFF/INDNTPOFF
The linker may convert such an ADD into a LEA, so we must not use the EFLAGS output. This causes miscompiles with -fsanitize=null after bacdf80 added llvm.threadlocal.address -- previously, global variables were known to be non-null, but the intrinsic is not currently known to return nonnull. (That should be corrected, but it shouldn't've caused miscompiles!) Differential Revision: https://reviews.llvm.org/D131716
1 parent 8a3481b commit 4d7f9b7

File tree

3 files changed

+179
-0
lines changed

3 files changed

+179
-0
lines changed

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4082,6 +4082,17 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
40824082
NoSignFlag = false;
40834083
ClearsOverflowFlag = false;
40844084

4085+
// "ELF Handling for Thread-Local Storage" specifies that x86-64 GOTTPOFF, and
4086+
// i386 GOTNTPOFF/INDNTPOFF relocations can convert an ADD to a LEA during
4087+
// Initial Exec to Local Exec relaxation. In these cases, we must not depend
4088+
// on the EFLAGS modification of ADD actually happening in the final binary.
4089+
if (MI.getOpcode() == X86::ADD64rm || MI.getOpcode() == X86::ADD32rm) {
4090+
unsigned Flags = MI.getOperand(5).getTargetFlags();
4091+
if (Flags == X86II::MO_GOTTPOFF || Flags == X86II::MO_INDNTPOFF ||
4092+
Flags == X86II::MO_GOTNTPOFF)
4093+
return false;
4094+
}
4095+
40854096
switch (MI.getOpcode()) {
40864097
default: return false;
40874098

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -run-pass=peephole-opt -mtriple=i386-- %s -o - | FileCheck %s
3+
4+
# Linkers may change `addq xx@GOTNTPOFF, %reg` to `leaq OFFSET(%reg), %reg`,
5+
# so we must not depend upon the EFLAGS output. Verify that the TEST
6+
# instruction won't be folded into the ADD.
7+
8+
# NOTE: the IR will no longer actually produce the input MIR after
9+
# llvm.threadlocal.address intrinsic is annotated as having a nonnull
10+
# result.
11+
12+
# NOTE2: the foo_nopic MIR was produced from IR with --relocation-model=static
13+
# while foo_pic's MIR was produced with --relocation-model=pic.
14+
15+
--- |
16+
target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128"
17+
target triple = "i386-unknown-linux-gnu"
18+
19+
@x = external thread_local(initialexec) global i32, align 4
20+
21+
define i32 @foo_nopic() {
22+
%1 = tail call ptr @llvm.threadlocal.address.p0(ptr nonnull @x)
23+
%cmp = icmp eq ptr %1, null
24+
%zext = zext i1 %cmp to i32
25+
ret i32 %zext
26+
}
27+
28+
define i32 @foo_pic() {
29+
%1 = tail call ptr @llvm.threadlocal.address.p0(ptr nonnull @x)
30+
%cmp = icmp eq ptr %1, null
31+
%zext = zext i1 %cmp to i32
32+
ret i32 %zext
33+
}
34+
35+
; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
36+
declare ptr @llvm.threadlocal.address.p0(ptr) #0
37+
38+
attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
39+
40+
...
41+
---
42+
name: foo_nopic
43+
alignment: 16
44+
tracksRegLiveness: true
45+
registers:
46+
- { id: 0, class: gr32 }
47+
- { id: 1, class: gr32 }
48+
- { id: 2, class: gr8 }
49+
- { id: 3, class: gr32 }
50+
frameInfo:
51+
maxAlignment: 1
52+
machineFunctionInfo: {}
53+
body: |
54+
bb.0 (%ir-block.0):
55+
; CHECK-LABEL: name: foo_nopic
56+
; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, 0, $gs :: (load (s32) from `ptr addrspace(256) null`, addrspace 256)
57+
; CHECK-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], $noreg, 1, $noreg, target-flags(x86-indntpoff) @x, $noreg, implicit-def dead $eflags :: (load (s32) from got)
58+
; CHECK-NEXT: TEST32rr [[ADD32rm]], [[ADD32rm]], implicit-def $eflags
59+
; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
60+
; CHECK-NEXT: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
61+
; CHECK-NEXT: $eax = COPY [[MOVZX32rr8_]]
62+
; CHECK-NEXT: RET 0, $eax
63+
%0:gr32 = MOV32rm $noreg, 1, $noreg, 0, $gs :: (load (s32) from `ptr addrspace(256) null`, addrspace 256)
64+
%1:gr32 = ADD32rm %0, $noreg, 1, $noreg, target-flags(x86-indntpoff) @x, $noreg, implicit-def dead $eflags :: (load (s32) from got)
65+
TEST32rr %1, %1, implicit-def $eflags
66+
%2:gr8 = SETCCr 4, implicit $eflags
67+
%3:gr32 = MOVZX32rr8 killed %2
68+
$eax = COPY %3
69+
RET 0, $eax
70+
71+
...
72+
---
73+
name: foo_pic
74+
alignment: 16
75+
tracksRegLiveness: true
76+
registers:
77+
- { id: 0, class: gr32_nosp }
78+
- { id: 1, class: gr32 }
79+
- { id: 2, class: gr32 }
80+
- { id: 3, class: gr8 }
81+
- { id: 4, class: gr32 }
82+
- { id: 5, class: gr32 }
83+
frameInfo:
84+
maxAlignment: 1
85+
machineFunctionInfo: {}
86+
body: |
87+
bb.0 (%ir-block.0):
88+
; CHECK-LABEL: name: foo_pic
89+
; CHECK: [[MOVPC32r:%[0-9]+]]:gr32 = MOVPC32r 0, implicit $esp, implicit $ssp
90+
; CHECK-NEXT: [[ADD32ri:%[0-9]+]]:gr32_nosp = ADD32ri [[MOVPC32r]], target-flags(x86-got-absolute-address) &_GLOBAL_OFFSET_TABLE_, implicit-def $eflags
91+
; CHECK-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, 0, $gs :: (load (s32) from `ptr addrspace(256) null`, addrspace 256)
92+
; CHECK-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[ADD32ri]], 1, $noreg, target-flags(x86-gotntpoff) @x, $noreg, implicit-def dead $eflags :: (load (s32) from got)
93+
; CHECK-NEXT: TEST32rr [[ADD32rm]], [[ADD32rm]], implicit-def $eflags
94+
; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
95+
; CHECK-NEXT: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
96+
; CHECK-NEXT: $eax = COPY [[MOVZX32rr8_]]
97+
; CHECK-NEXT: RET 0, $eax
98+
%5:gr32 = MOVPC32r 0, implicit $esp, implicit $ssp
99+
%0:gr32_nosp = ADD32ri %5, target-flags(x86-got-absolute-address) &_GLOBAL_OFFSET_TABLE_, implicit-def $eflags
100+
%1:gr32 = MOV32rm $noreg, 1, $noreg, 0, $gs :: (load (s32) from `ptr addrspace(256) null`, addrspace 256)
101+
%2:gr32 = ADD32rm %1, %0, 1, $noreg, target-flags(x86-gotntpoff) @x, $noreg, implicit-def dead $eflags :: (load (s32) from got)
102+
TEST32rr %2, %2, implicit-def $eflags
103+
%3:gr8 = SETCCr 4, implicit $eflags
104+
%4:gr32 = MOVZX32rr8 killed %3
105+
$eax = COPY %4
106+
RET 0, $eax
107+
...
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -run-pass=peephole-opt -mtriple=x86_64-- %s -o - | FileCheck %s
3+
4+
# Linkers may change `addq xx@GOTTPOFF, %reg` to `leaq OFFSET(%reg), %reg`,
5+
# so we must not depend upon the EFLAGS output. Verify that the TEST
6+
# instruction won't be folded into the ADD.
7+
8+
# NOTE: the IR will no longer actually produce the input MIR after
9+
# llvm.threadlocal.address intrinsic is annotated as having a nonnull
10+
# result.
11+
12+
--- |
13+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
14+
target triple = "x86_64-unknown-linux-gnu"
15+
16+
@x = external thread_local(initialexec) global i32, align 4
17+
18+
define i32 @foo() {
19+
%1 = tail call ptr @llvm.threadlocal.address.p0(ptr nonnull @x)
20+
%cmp = icmp eq ptr %1, null
21+
%zext = zext i1 %cmp to i32
22+
ret i32 %zext
23+
}
24+
25+
; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
26+
declare ptr @llvm.threadlocal.address.p0(ptr) #0
27+
28+
attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
29+
30+
...
31+
---
32+
name: foo
33+
alignment: 16
34+
tracksRegLiveness: true
35+
registers:
36+
- { id: 0, class: gr64 }
37+
- { id: 1, class: gr64 }
38+
- { id: 2, class: gr8 }
39+
- { id: 3, class: gr32 }
40+
frameInfo:
41+
maxAlignment: 1
42+
machineFunctionInfo: {}
43+
body: |
44+
bb.0 (%ir-block.0):
45+
; CHECK-LABEL: name: foo
46+
; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm $noreg, 1, $noreg, 0, $fs :: (load (s64) from `ptr addrspace(257) null`, addrspace 257)
47+
; CHECK-NEXT: [[ADD64rm:%[0-9]+]]:gr64 = ADD64rm [[MOV64rm]], $rip, 1, $noreg, target-flags(x86-gottpoff) @x, $noreg, implicit-def dead $eflags :: (load (s64) from got)
48+
; CHECK-NEXT: TEST64rr [[ADD64rm]], [[ADD64rm]], implicit-def $eflags
49+
; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
50+
; CHECK-NEXT: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
51+
; CHECK-NEXT: $eax = COPY [[MOVZX32rr8_]]
52+
; CHECK-NEXT: RET 0, $eax
53+
%0:gr64 = MOV64rm $noreg, 1, $noreg, 0, $fs :: (load (s64) from `ptr addrspace(257) null`, addrspace 257)
54+
%1:gr64 = ADD64rm %0, $rip, 1, $noreg, target-flags(x86-gottpoff) @x, $noreg, implicit-def dead $eflags :: (load (s64) from got)
55+
TEST64rr %1, %1, implicit-def $eflags
56+
%2:gr8 = SETCCr 4, implicit $eflags
57+
%3:gr32 = MOVZX32rr8 killed %2
58+
$eax = COPY %3
59+
RET 0, $eax
60+
61+
...

0 commit comments

Comments
 (0)