Skip to content

Commit 48a21e6

Browse files
authored
[RISCV] Fix a correctness issue in optimizeCondBranch. Prevent optimizing compare with x0. NFC (#145440)
We were incorrectly changing -1 to 0 for unsigned compares in case 1. The comment incorrectly said UINT64_MAX is bigger than INT64_MAX, but we were doing a signed compare and UINT64_MAX is smaller than INT64_MAX in signed space. Prevent changing 0 constants since we can use x0. The test cases for these are contrived to use addi rd, x0, 0. We're more likely to have a COPY from x0 which we already don't optimize for other reasons. Check if registers are virtual before calling hasOneUse. The use count for physical registers is meaningless.
1 parent 9702d37 commit 48a21e6

File tree

2 files changed

+347
-30
lines changed

2 files changed

+347
-30
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 39 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1449,36 +1449,45 @@ bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
14491449
return Register();
14501450
};
14511451

1452-
if (isFromLoadImm(MRI, LHS, C0) && MRI.hasOneUse(LHS.getReg())) {
1453-
// Might be case 1.
1454-
// Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need
1455-
// to worry about unsigned overflow here)
1456-
if (C0 < INT64_MAX)
1457-
if (Register RegZ = searchConst(C0 + 1)) {
1458-
reverseBranchCondition(Cond);
1459-
Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false);
1460-
Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);
1461-
// We might extend the live range of Z, clear its kill flag to
1462-
// account for this.
1463-
MRI.clearKillFlags(RegZ);
1464-
modifyBranch();
1465-
return true;
1466-
}
1467-
} else if (isFromLoadImm(MRI, RHS, C0) && MRI.hasOneUse(RHS.getReg())) {
1468-
// Might be case 2.
1469-
// For unsigned cases, we don't want C1 to wrap back to UINT64_MAX
1470-
// when C0 is zero.
1471-
if ((CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT) || C0)
1472-
if (Register RegZ = searchConst(C0 - 1)) {
1473-
reverseBranchCondition(Cond);
1474-
Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);
1475-
Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false);
1476-
// We might extend the live range of Z, clear its kill flag to
1477-
// account for this.
1478-
MRI.clearKillFlags(RegZ);
1479-
modifyBranch();
1480-
return true;
1481-
}
1452+
// Might be case 1.
1453+
// Don't change 0 to 1 since we can use x0.
1454+
// For unsigned cases changing -1U to 0 would be incorrect.
1455+
// The incorrect case for signed would be INT_MAX, but isFromLoadImm can't
1456+
// return that.
1457+
if (isFromLoadImm(MRI, LHS, C0) && C0 != 0 && LHS.getReg().isVirtual() &&
1458+
MRI.hasOneUse(LHS.getReg()) &&
1459+
(CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT || C0 != -1)) {
1460+
assert(isInt<12>(C0) && "Unexpected immediate");
1461+
if (Register RegZ = searchConst(C0 + 1)) {
1462+
reverseBranchCondition(Cond);
1463+
Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false);
1464+
Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);
1465+
// We might extend the live range of Z, clear its kill flag to
1466+
// account for this.
1467+
MRI.clearKillFlags(RegZ);
1468+
modifyBranch();
1469+
return true;
1470+
}
1471+
}
1472+
1473+
// Might be case 2.
1474+
// For signed cases we don't want to change 0 since we can use x0.
1475+
// For unsigned cases changing 0 to -1U would be incorrect.
1476+
// The incorrect case for signed would be INT_MIN, but isFromLoadImm can't
1477+
// return that.
1478+
if (isFromLoadImm(MRI, RHS, C0) && C0 != 0 && RHS.getReg().isVirtual() &&
1479+
MRI.hasOneUse(RHS.getReg())) {
1480+
assert(isInt<12>(C0) && "Unexpected immediate");
1481+
if (Register RegZ = searchConst(C0 - 1)) {
1482+
reverseBranchCondition(Cond);
1483+
Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);
1484+
Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false);
1485+
// We might extend the live range of Z, clear its kill flag to
1486+
// account for this.
1487+
MRI.clearKillFlags(RegZ);
1488+
modifyBranch();
1489+
return true;
1490+
}
14821491
}
14831492

14841493
return false;

llvm/test/CodeGen/RISCV/branch-opt.mir

Lines changed: 308 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,74 @@
1818
ret void
1919
}
2020

21+
define void @ule_negone(ptr %a, i32 signext %b, ptr %c, ptr %d) {
22+
store i32 0, ptr %a
23+
%p = icmp ule i32 %b, -1
24+
br i1 %p, label %block1, label %block2
25+
26+
block1: ; preds = %0
27+
store i32 %b, ptr %c
28+
br label %end_block
29+
30+
block2: ; preds = %0
31+
store i32 87, ptr %d
32+
br label %end_block
33+
34+
end_block: ; preds = %block2, %block1
35+
ret void
36+
}
37+
38+
define void @ult_zero(ptr %a, i32 signext %b, ptr %c, ptr %d) {
39+
store i32 -1, ptr %a
40+
%p = icmp ult i32 %b, 0
41+
br i1 %p, label %block1, label %block2
42+
43+
block1: ; preds = %0
44+
store i32 %b, ptr %c
45+
br label %end_block
46+
47+
block2: ; preds = %0
48+
store i32 87, ptr %d
49+
br label %end_block
50+
51+
end_block: ; preds = %block2, %block1
52+
ret void
53+
}
54+
55+
define void @sle_zero(ptr %a, i32 signext %b, ptr %c, ptr %d) {
56+
store i32 1, ptr %a
57+
%p = icmp sle i32 %b, 0
58+
br i1 %p, label %block1, label %block2
59+
60+
block1: ; preds = %0
61+
store i32 %b, ptr %c
62+
br label %end_block
63+
64+
block2: ; preds = %0
65+
store i32 87, ptr %d
66+
br label %end_block
67+
68+
end_block: ; preds = %block2, %block1
69+
ret void
70+
}
71+
72+
define void @slt_zero(ptr %a, i32 signext %b, ptr %c, ptr %d) {
73+
store i32 -1, ptr %a
74+
%p = icmp slt i32 %b, 0
75+
br i1 %p, label %block1, label %block2
76+
77+
block1: ; preds = %0
78+
store i32 %b, ptr %c
79+
br label %end_block
80+
81+
block2: ; preds = %0
82+
store i32 87, ptr %d
83+
br label %end_block
84+
85+
end_block: ; preds = %block2, %block1
86+
ret void
87+
}
88+
2189
declare void @bar(...)
2290

2391
...
@@ -66,3 +134,243 @@ body: |
66134
PseudoRET
67135
68136
...
137+
---
138+
name: ule_negone
139+
tracksRegLiveness: true
140+
body: |
141+
; CHECK-LABEL: name: ule_negone
142+
; CHECK: bb.0:
143+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
144+
; CHECK-NEXT: liveins: $x10, $x11, $x12, $x13
145+
; CHECK-NEXT: {{ $}}
146+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x13
147+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x12
148+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x11
149+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10
150+
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 0
151+
; CHECK-NEXT: SW killed [[ADDI]], [[COPY3]], 0 :: (store (s32))
152+
; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, -1
153+
; CHECK-NEXT: BLTU killed [[ADDI1]], [[COPY2]], %bb.2
154+
; CHECK-NEXT: PseudoBR %bb.1
155+
; CHECK-NEXT: {{ $}}
156+
; CHECK-NEXT: bb.1:
157+
; CHECK-NEXT: successors: %bb.3(0x80000000)
158+
; CHECK-NEXT: {{ $}}
159+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY [[COPY2]]
160+
; CHECK-NEXT: SW [[COPY4]], [[COPY1]], 0 :: (store (s32))
161+
; CHECK-NEXT: PseudoBR %bb.3
162+
; CHECK-NEXT: {{ $}}
163+
; CHECK-NEXT: bb.2:
164+
; CHECK-NEXT: successors: %bb.3(0x80000000)
165+
; CHECK-NEXT: {{ $}}
166+
; CHECK-NEXT: [[ADDI2:%[0-9]+]]:gpr = ADDI $x0, 87
167+
; CHECK-NEXT: SW killed [[ADDI2]], [[COPY]], 0 :: (store (s32))
168+
; CHECK-NEXT: {{ $}}
169+
; CHECK-NEXT: bb.3:
170+
; CHECK-NEXT: PseudoRET
171+
bb.0:
172+
successors: %bb.1, %bb.2
173+
liveins: $x10, $x11, $x12, $x13
174+
175+
%3:gpr = COPY $x13
176+
%2:gpr = COPY $x12
177+
%1:gpr = COPY $x11
178+
%0:gpr = COPY $x10
179+
%5:gpr = ADDI $x0, 0
180+
SW killed %5, %0, 0 :: (store (s32))
181+
%6:gpr = ADDI $x0, -1
182+
BLTU killed %6, %1, %bb.2
183+
PseudoBR %bb.1
184+
185+
bb.1:
186+
%4:gpr = COPY %1
187+
SW %4, %2, 0 :: (store (s32))
188+
PseudoBR %bb.3
189+
190+
bb.2:
191+
%7:gpr = ADDI $x0, 87
192+
SW killed %7, %3, 0 :: (store (s32))
193+
194+
bb.3:
195+
PseudoRET
196+
...
197+
---
198+
name: ult_zero
199+
tracksRegLiveness: true
200+
body: |
201+
; CHECK-LABEL: name: ult_zero
202+
; CHECK: bb.0:
203+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
204+
; CHECK-NEXT: liveins: $x10, $x11, $x12, $x13
205+
; CHECK-NEXT: {{ $}}
206+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x13
207+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x12
208+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x11
209+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10
210+
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, -1
211+
; CHECK-NEXT: SW killed [[ADDI]], [[COPY3]], 0 :: (store (s32))
212+
; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0
213+
; CHECK-NEXT: BLTU [[COPY2]], killed [[ADDI1]], %bb.2
214+
; CHECK-NEXT: PseudoBR %bb.1
215+
; CHECK-NEXT: {{ $}}
216+
; CHECK-NEXT: bb.1:
217+
; CHECK-NEXT: successors: %bb.3(0x80000000)
218+
; CHECK-NEXT: {{ $}}
219+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY [[COPY2]]
220+
; CHECK-NEXT: SW [[COPY4]], [[COPY1]], 0 :: (store (s32))
221+
; CHECK-NEXT: PseudoBR %bb.3
222+
; CHECK-NEXT: {{ $}}
223+
; CHECK-NEXT: bb.2:
224+
; CHECK-NEXT: successors: %bb.3(0x80000000)
225+
; CHECK-NEXT: {{ $}}
226+
; CHECK-NEXT: [[ADDI2:%[0-9]+]]:gpr = ADDI $x0, 87
227+
; CHECK-NEXT: SW killed [[ADDI2]], [[COPY]], 0 :: (store (s32))
228+
; CHECK-NEXT: {{ $}}
229+
; CHECK-NEXT: bb.3:
230+
; CHECK-NEXT: PseudoRET
231+
bb.0:
232+
successors: %bb.1, %bb.2
233+
liveins: $x10, $x11, $x12, $x13
234+
235+
%3:gpr = COPY $x13
236+
%2:gpr = COPY $x12
237+
%1:gpr = COPY $x11
238+
%0:gpr = COPY $x10
239+
%5:gpr = ADDI $x0, -1
240+
SW killed %5, %0, 0 :: (store (s32))
241+
%6:gpr = ADDI $x0, 0
242+
BLTU %1, killed %6, %bb.2
243+
PseudoBR %bb.1
244+
245+
bb.1:
246+
%4:gpr = COPY %1
247+
SW %4, %2, 0 :: (store (s32))
248+
PseudoBR %bb.3
249+
250+
bb.2:
251+
%7:gpr = ADDI $x0, 87
252+
SW killed %7, %3, 0 :: (store (s32))
253+
254+
bb.3:
255+
PseudoRET
256+
...
257+
---
258+
name: sle_zero
259+
tracksRegLiveness: true
260+
body: |
261+
; CHECK-LABEL: name: sle_zero
262+
; CHECK: bb.0:
263+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
264+
; CHECK-NEXT: liveins: $x10, $x11, $x12, $x13
265+
; CHECK-NEXT: {{ $}}
266+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x13
267+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x12
268+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x11
269+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10
270+
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
271+
; CHECK-NEXT: SW killed [[ADDI]], [[COPY3]], 0 :: (store (s32))
272+
; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0
273+
; CHECK-NEXT: BLT killed [[ADDI1]], [[COPY2]], %bb.2
274+
; CHECK-NEXT: PseudoBR %bb.1
275+
; CHECK-NEXT: {{ $}}
276+
; CHECK-NEXT: bb.1:
277+
; CHECK-NEXT: successors: %bb.3(0x80000000)
278+
; CHECK-NEXT: {{ $}}
279+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY [[COPY2]]
280+
; CHECK-NEXT: SW [[COPY4]], [[COPY1]], 0 :: (store (s32))
281+
; CHECK-NEXT: PseudoBR %bb.3
282+
; CHECK-NEXT: {{ $}}
283+
; CHECK-NEXT: bb.2:
284+
; CHECK-NEXT: successors: %bb.3(0x80000000)
285+
; CHECK-NEXT: {{ $}}
286+
; CHECK-NEXT: [[ADDI2:%[0-9]+]]:gpr = ADDI $x0, 87
287+
; CHECK-NEXT: SW killed [[ADDI2]], [[COPY]], 0 :: (store (s32))
288+
; CHECK-NEXT: {{ $}}
289+
; CHECK-NEXT: bb.3:
290+
; CHECK-NEXT: PseudoRET
291+
bb.0:
292+
successors: %bb.1, %bb.2
293+
liveins: $x10, $x11, $x12, $x13
294+
295+
%3:gpr = COPY $x13
296+
%2:gpr = COPY $x12
297+
%1:gpr = COPY $x11
298+
%0:gpr = COPY $x10
299+
%5:gpr = ADDI $x0, 1
300+
SW killed %5, %0, 0 :: (store (s32))
301+
%6:gpr = ADDI $x0, 0
302+
BLT killed %6, %1, %bb.2
303+
PseudoBR %bb.1
304+
305+
bb.1:
306+
%4:gpr = COPY %1
307+
SW %4, %2, 0 :: (store (s32))
308+
PseudoBR %bb.3
309+
310+
bb.2:
311+
%7:gpr = ADDI $x0, 87
312+
SW killed %7, %3, 0 :: (store (s32))
313+
314+
bb.3:
315+
PseudoRET
316+
...
317+
---
318+
name: slt_zero
319+
tracksRegLiveness: true
320+
body: |
321+
; CHECK-LABEL: name: slt_zero
322+
; CHECK: bb.0:
323+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
324+
; CHECK-NEXT: liveins: $x10, $x11, $x12, $x13
325+
; CHECK-NEXT: {{ $}}
326+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x13
327+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x12
328+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x11
329+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10
330+
; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, -1
331+
; CHECK-NEXT: SW killed [[ADDI]], [[COPY3]], 0 :: (store (s32))
332+
; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0
333+
; CHECK-NEXT: BLT [[COPY2]], killed [[ADDI1]], %bb.2
334+
; CHECK-NEXT: PseudoBR %bb.1
335+
; CHECK-NEXT: {{ $}}
336+
; CHECK-NEXT: bb.1:
337+
; CHECK-NEXT: successors: %bb.3(0x80000000)
338+
; CHECK-NEXT: {{ $}}
339+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY [[COPY2]]
340+
; CHECK-NEXT: SW [[COPY4]], [[COPY1]], 0 :: (store (s32))
341+
; CHECK-NEXT: PseudoBR %bb.3
342+
; CHECK-NEXT: {{ $}}
343+
; CHECK-NEXT: bb.2:
344+
; CHECK-NEXT: successors: %bb.3(0x80000000)
345+
; CHECK-NEXT: {{ $}}
346+
; CHECK-NEXT: [[ADDI2:%[0-9]+]]:gpr = ADDI $x0, 87
347+
; CHECK-NEXT: SW killed [[ADDI2]], [[COPY]], 0 :: (store (s32))
348+
; CHECK-NEXT: {{ $}}
349+
; CHECK-NEXT: bb.3:
350+
; CHECK-NEXT: PseudoRET
351+
bb.0:
352+
successors: %bb.1, %bb.2
353+
liveins: $x10, $x11, $x12, $x13
354+
355+
%3:gpr = COPY $x13
356+
%2:gpr = COPY $x12
357+
%1:gpr = COPY $x11
358+
%0:gpr = COPY $x10
359+
%5:gpr = ADDI $x0, -1
360+
SW killed %5, %0, 0 :: (store (s32))
361+
%6:gpr = ADDI $x0, 0
362+
BLT %1, killed %6, %bb.2
363+
PseudoBR %bb.1
364+
365+
bb.1:
366+
%4:gpr = COPY %1
367+
SW %4, %2, 0 :: (store (s32))
368+
PseudoBR %bb.3
369+
370+
bb.2:
371+
%7:gpr = ADDI $x0, 87
372+
SW killed %7, %3, 0 :: (store (s32))
373+
374+
bb.3:
375+
PseudoRET
376+
...

0 commit comments

Comments
 (0)