Skip to content

Commit ed87f0a

Browse files
authored
[AMDGPU] Visit all PHIs in each call to optimizeLiveType (#147522)
Make the Visited set a local variable, otherwise we can reject a PHI (those that do not have a zeroinitializer constant) but mark it as visited, and the rest of the function thinks the PHI is ok when it isn't. This is a bit crude but it's the only fix that consistently worked in my testing. Fixes SWDEV-541767
1 parent cebfb75 commit ed87f0a

File tree

2 files changed

+73
-2
lines changed

2 files changed

+73
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,6 @@ class LiveRegOptimizer {
7979

8080
/// The scalar type to convert to
8181
Type *const ConvertToScalar;
82-
/// The set of visited Instructions
83-
SmallPtrSet<Instruction *, 4> Visited;
8482
/// Map of Value -> Converted Value
8583
ValueToValueMap ValMap;
8684
/// Map of containing conversions from Optimal Type -> Original Type per BB.
@@ -288,6 +286,7 @@ bool LiveRegOptimizer::optimizeLiveType(
288286
SmallPtrSet<PHINode *, 4> PhiNodes;
289287
SmallPtrSet<Instruction *, 4> Defs;
290288
SmallPtrSet<Instruction *, 4> Uses;
289+
SmallPtrSet<Instruction *, 4> Visited;
291290

292291
Worklist.push_back(cast<Instruction>(I));
293292
while (!Worklist.empty()) {
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-late-codegenprepare %s | FileCheck %s
3+
4+
; This crashed because the PHI with a splat was rejected, but then we marked the PHI
5+
; as visited and tried to convert one of its user afterwards.
6+
7+
define amdgpu_kernel void @widget(ptr %arg, ptr %arg1, ptr %arg2) {
8+
; CHECK-LABEL: define amdgpu_kernel void @widget(
9+
; CHECK-SAME: ptr [[ARG:%.*]], ptr [[ARG1:%.*]], ptr [[ARG2:%.*]]) {
10+
; CHECK-NEXT: [[BB:.*]]:
11+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG]], align 4
12+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i1
13+
; CHECK-NEXT: [[ARG1_LOAD:%.*]] = load <4 x i8>, ptr [[ARG1]], align 4
14+
; CHECK-NEXT: [[ARG2_LOAD:%.*]] = load i64, ptr [[ARG2]], align 4
15+
; CHECK-NEXT: br label %[[BB_1:.*]]
16+
; CHECK: [[BB_1]]:
17+
; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ null, %[[BB]] ], [ [[ARG1]], %[[BB_6:.*]] ]
18+
; CHECK-NEXT: [[PHI4:%.*]] = phi <4 x i8> [ splat (i8 1), %[[BB]] ], [ [[PHI15:%.*]], %[[BB_6]] ]
19+
; CHECK-NEXT: br i1 [[TMP1]], label %[[BB_2:.*]], label %[[BB_6]]
20+
; CHECK: [[BB_2]]:
21+
; CHECK-NEXT: [[PHI7:%.*]] = phi <4 x i8> [ [[PHI13:%.*]], %[[BB_5:.*]] ], [ [[PHI4]], %[[BB_1]] ]
22+
; CHECK-NEXT: br i1 [[TMP1]], label %[[BB_4:.*]], label %[[BB_5]]
23+
; CHECK: [[BB_3:.*]]:
24+
; CHECK-NEXT: br i1 [[TMP1]], label %[[BB_4]], label %[[BB_EXIT:.*]]
25+
; CHECK: [[BB_4]]:
26+
; CHECK-NEXT: [[PHI11:%.*]] = phi <4 x i8> [ [[PHI7]], %[[BB_3]] ], [ zeroinitializer, %[[BB_2]] ]
27+
; CHECK-NEXT: store <4 x i8> [[PHI11]], ptr [[PHI]], align 1
28+
; CHECK-NEXT: br label %[[BB_5]]
29+
; CHECK: [[BB_5]]:
30+
; CHECK-NEXT: [[PHI13]] = phi <4 x i8> [ zeroinitializer, %[[BB_4]] ], [ [[PHI7]], %[[BB_2]] ]
31+
; CHECK-NEXT: br i1 [[TMP1]], label %[[BB_2]], label %[[BB_6]]
32+
; CHECK: [[BB_6]]:
33+
; CHECK-NEXT: [[PHI15]] = phi <4 x i8> [ [[ARG1_LOAD]], %[[BB_1]] ], [ zeroinitializer, %[[BB_5]] ]
34+
; CHECK-NEXT: br label %[[BB_1]]
35+
; CHECK: [[BB_EXIT]]:
36+
; CHECK-NEXT: ret void
37+
;
38+
bb:
39+
%ld = load i32, ptr %arg, align 4
40+
%ld.trunc = trunc i32 %ld to i1
41+
%arg1.load = load <4 x i8>, ptr %arg1, align 4
42+
%arg2.load = load i64, ptr %arg2, align 4
43+
br label %bb.1
44+
45+
bb.1:
46+
%phi = phi ptr [ null, %bb ], [ %arg1, %bb.6 ]
47+
%phi4 = phi <4 x i8> [ splat (i8 1), %bb ], [ %phi15, %bb.6 ]
48+
br i1 %ld.trunc, label %bb.2, label %bb.6
49+
50+
bb.2:
51+
%phi7 = phi <4 x i8> [ %phi13, %bb.5 ], [ %phi4, %bb.1 ]
52+
br i1 %ld.trunc, label %bb.4, label %bb.5
53+
54+
bb.3:
55+
br i1 %ld.trunc, label %bb.4, label %bb.exit
56+
57+
bb.4:
58+
%phi11 = phi <4 x i8> [ %phi7, %bb.3 ], [ zeroinitializer, %bb.2 ]
59+
store <4 x i8> %phi11, ptr %phi, align 1
60+
br label %bb.5
61+
62+
bb.5:
63+
%phi13 = phi <4 x i8> [ zeroinitializer, %bb.4 ], [ %phi7, %bb.2 ]
64+
br i1 %ld.trunc, label %bb.2, label %bb.6
65+
66+
bb.6:
67+
%phi15 = phi <4 x i8> [ %arg1.load, %bb.1 ], [ zeroinitializer, %bb.5 ]
68+
br label %bb.1
69+
70+
bb.exit:
71+
ret void
72+
}

0 commit comments

Comments
 (0)