Skip to content

Commit c39d661

Browse files
committed
[LLVM] Add patches for PPC knownbits and AArch64 globalisel
1 parent 8f6432e commit c39d661

File tree

3 files changed

+188
-1
lines changed

3 files changed

+188
-1
lines changed

deps/llvm.mk

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -534,8 +534,10 @@ endif
534534
$(eval $(call LLVM_PATCH,llvm-11-D85313-debuginfo-empty-arange)) # remove for LLVM 12
535535
$(eval $(call LLVM_PATCH,llvm-11-D90722-rtdyld-absolute-relocs)) # remove for LLVM 12
536536
$(eval $(call LLVM_PATCH,llvm-invalid-addrspacecast-sink)) # upstreamed as D92210
537-
$(eval $(call LLVM_PATCH,llvm-11-D92906-ppc-setjmp))
537+
$(eval $(call LLVM_PATCH,llvm-11-D92906-ppc-setjmp)) # remove for LLVM 12
538538
$(eval $(call LLVM_PATCH,llvm-11-PR48458-X86ISelDAGToDAG)) # remove for LLVM 12
539+
$(eval $(call LLVM_PATCH,llvm-11-D93092-ppc-knownbits))
540+
$(eval $(call LLVM_PATCH,llvm-11-D93154-globalisel-as))
539541
endif # LLVM_VER 11.0
540542

541543

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
From 8bec64e2c0386934d4e38344907f0f4b0de4d8a3 Mon Sep 17 00:00:00 2001
2+
From: Valentin Churavy <v.churavy@gmail.com>
3+
Date: Tue, 15 Dec 2020 09:59:18 -0500
4+
Subject: [PATCH] [PowerPC] KnownBits should be constant when performing
5+
non-sign comparison
6+
7+
In `PPCTargetLowering::DAGCombineTruncBoolExt`, when checking if it's correct to perform the transformation for non-sign comparison, as the comment says
8+
```
9+
// This is neither a signed nor an unsigned comparison, just make sure
10+
// that the high bits are equal.
11+
```
12+
Origin check
13+
```
14+
if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
15+
return SDValue();
16+
```
17+
is not strong enough. For example,
18+
```
19+
Op1Known = 111x000x;
20+
Op2Known = 111x000x;
21+
```
22+
Bit 4, besides bit 0, is still unknown and affects the final result.
23+
24+
This patch fixes https://bugs.llvm.org/show_bug.cgi?id=48388.
25+
26+
Differential Revision: https://reviews.llvm.org/D93092
27+
---
28+
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 +-
29+
llvm/test/CodeGen/PowerPC/pr48388.ll | 42 +++++++++++++++++++++
30+
2 files changed, 44 insertions(+), 1 deletion(-)
31+
create mode 100644 llvm/test/CodeGen/PowerPC/pr48388.ll
32+
33+
diff --git llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp
34+
index f54f1673526d..76b32db44656 100644
35+
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
36+
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
37+
@@ -13291,7 +13291,8 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
38+
Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
39+
Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
40+
41+
- if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
42+
+ if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
43+
+ Op1Known.getConstant() != Op2Known.getConstant())
44+
return SDValue();
45+
}
46+
}
47+
diff --git llvm/test/CodeGen/PowerPC/pr48388.ll llvm/test/CodeGen/PowerPC/pr48388.ll
48+
new file mode 100644
49+
index 000000000000..138fb6147832
50+
--- /dev/null
51+
+++ llvm/test/CodeGen/PowerPC/pr48388.ll
52+
@@ -0,0 +1,42 @@
53+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
54+
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le -ppc-asm-full-reg-names \
55+
+; RUN: < %s | FileCheck %s
56+
+
57+
+define i64 @julia_div_i64(i64 %0, i64 %1) local_unnamed_addr #0 {
58+
+; CHECK-LABEL: julia_div_i64:
59+
+; CHECK: # %bb.0: # %entry
60+
+; CHECK-NEXT: divd r6, r3, r4
61+
+; CHECK-NEXT: li r5, 32767
62+
+; CHECK-NEXT: sldi r5, r5, 32
63+
+; CHECK-NEXT: oris r7, r5, 40069
64+
+; CHECK-NEXT: oris r5, r5, 40079
65+
+; CHECK-NEXT: cmpdi r3, 0
66+
+; CHECK-NEXT: ori r7, r7, 13456
67+
+; CHECK-NEXT: ori r5, r5, 65264
68+
+; CHECK-NEXT: iselgt r9, r5, r7
69+
+; CHECK-NEXT: cmpdi r4, 0
70+
+; CHECK-NEXT: mulld r8, r6, r4
71+
+; CHECK-NEXT: iselgt r4, r5, r7
72+
+; CHECK-NEXT: xor r4, r9, r4
73+
+; CHECK-NEXT: cntlzd r4, r4
74+
+; CHECK-NEXT: rldicl r4, r4, 58, 63
75+
+; CHECK-NEXT: xor r3, r8, r3
76+
+; CHECK-NEXT: addic r5, r3, -1
77+
+; CHECK-NEXT: subfe r3, r5, r3
78+
+; CHECK-NEXT: and r3, r4, r3
79+
+; CHECK-NEXT: add r3, r6, r3
80+
+; CHECK-NEXT: blr
81+
+entry:
82+
+ %2 = sdiv i64 %0, %1
83+
+ %3 = icmp sgt i64 %0, 0
84+
+ %4 = icmp sgt i64 %1, 0
85+
+ %5 = select i1 %3, i64 140735820070640, i64 140735819363472
86+
+ %6 = select i1 %4, i64 140735820070640, i64 140735819363472
87+
+ %7 = icmp eq i64 %5, %6
88+
+ %8 = mul i64 %2, %1
89+
+ %9 = icmp ne i64 %8, %0
90+
+ %10 = and i1 %7, %9
91+
+ %11 = zext i1 %10 to i64
92+
+ %12 = add i64 %2, %11
93+
+ ret i64 %12
94+
+}
95+
--
96+
2.29.2
97+
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
From af809ec100ef60cdeeef776e54c123e4fc8f1071 Mon Sep 17 00:00:00 2001
2+
From: Jameson Nash <vtjnash@gmail.com>
3+
Date: Tue, 15 Dec 2020 10:04:08 -0500
4+
Subject: [PATCH] GlobalISel: remove assert that memcpy Src and Dst addrspace
5+
must be identical
6+
7+
The LangRef does not require these arguments to have the same type.
8+
9+
Differential Revision: https://reviews.llvm.org/D93154
10+
---
11+
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 27 +++++++++++--------
12+
1 file changed, 16 insertions(+), 11 deletions(-)
13+
14+
diff --git llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
15+
index 79f74a47d83c..7bd6f8f52c8b 100644
16+
--- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
17+
+++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
18+
@@ -1240,7 +1240,6 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
19+
// of that value loaded. This can result in a sequence of loads and stores
20+
// mixed types, depending on what the target specifies as good types to use.
21+
unsigned CurrOffset = 0;
22+
- LLT PtrTy = MRI.getType(Src);
23+
unsigned Size = KnownLen;
24+
for (auto CopyTy : MemOps) {
25+
// Issuing an unaligned load / store pair that overlaps with the previous
26+
@@ -1258,15 +1257,20 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
27+
Register LoadPtr = Src;
28+
Register Offset;
29+
if (CurrOffset != 0) {
30+
- Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
31+
- .getReg(0);
32+
- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
33+
+ LLT LoadTy = MRI.getType(Src);
34+
+ Offset =
35+
+ MIB.buildConstant(LLT::scalar(LoadTy.getSizeInBits()), CurrOffset)
36+
+ .getReg(0);
37+
+ LoadPtr = MIB.buildPtrAdd(LoadTy, Src, Offset).getReg(0);
38+
}
39+
auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
40+
41+
// Create the store.
42+
- Register StorePtr =
43+
- CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
44+
+ Register StorePtr = Dst;
45+
+ if (CurrOffset != 0) {
46+
+ LLT StoreTy = MRI.getType(Dst);
47+
+ StorePtr = MIB.buildPtrAdd(StoreTy, Dst, Offset).getReg(0);
48+
+ }
49+
MIB.buildStore(LdVal, StorePtr, *StoreMMO);
50+
CurrOffset += CopyTy.getSizeInBytes();
51+
Size -= CopyTy.getSizeInBytes();
52+
@@ -1343,7 +1347,6 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
53+
// Apart from that, this loop is pretty much doing the same thing as the
54+
// memcpy codegen function.
55+
unsigned CurrOffset = 0;
56+
- LLT PtrTy = MRI.getType(Src);
57+
SmallVector<Register, 16> LoadVals;
58+
for (auto CopyTy : MemOps) {
59+
// Construct MMO for the load.
60+
@@ -1353,9 +1356,10 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
61+
// Create the load.
62+
Register LoadPtr = Src;
63+
if (CurrOffset != 0) {
64+
+ LLT LoadTy = MRI.getType(Src);
65+
auto Offset =
66+
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
67+
- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
68+
+ MIB.buildConstant(LLT::scalar(LoadTy.getSizeInBits()), CurrOffset);
69+
+ LoadPtr = MIB.buildPtrAdd(LoadTy, Src, Offset).getReg(0);
70+
}
71+
LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
72+
CurrOffset += CopyTy.getSizeInBytes();
73+
@@ -1370,9 +1374,10 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
74+
75+
Register StorePtr = Dst;
76+
if (CurrOffset != 0) {
77+
+ LLT StoreTy = MRI.getType(Dst);
78+
auto Offset =
79+
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
80+
- StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
81+
+ MIB.buildConstant(LLT::scalar(StoreTy.getSizeInBits()), CurrOffset);
82+
+ StorePtr = MIB.buildPtrAdd(StoreTy, Dst, Offset).getReg(0);
83+
}
84+
MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
85+
CurrOffset += CopyTy.getSizeInBytes();
86+
--
87+
2.29.2
88+

0 commit comments

Comments
 (0)