-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[AMDGPU] Visit all PHIs in each call to optimizeLiveType #147522
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Make the Visited set a local variable, otherwise we can reject a PHI (those that do not have a zeroinitializer constant) but mark it as visited, and the rest of the function thinks the PHI is ok when it isn't. This is a bit crude but it's the only fix that consistently worked in my testing. Fixes SWDEV-541767
@llvm/pr-subscribers-backend-amdgpu Author: Pierre van Houtryve (Pierre-vh) ChangesMake the Visited set a local variable, otherwise we can reject a PHI Fixes SWDEV-541767 Full diff: https://github.com/llvm/llvm-project/pull/147522.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
index df976cf3f7fdb..523c66c72273c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
@@ -79,8 +79,6 @@ class LiveRegOptimizer {
/// The scalar type to convert to
Type *const ConvertToScalar;
- /// The set of visited Instructions
- SmallPtrSet<Instruction *, 4> Visited;
/// Map of Value -> Converted Value
ValueToValueMap ValMap;
/// Map of containing conversions from Optimal Type -> Original Type per BB.
@@ -288,6 +286,7 @@ bool LiveRegOptimizer::optimizeLiveType(
SmallPtrSet<PHINode *, 4> PhiNodes;
SmallPtrSet<Instruction *, 4> Defs;
SmallPtrSet<Instruction *, 4> Uses;
+ SmallPtrSet<Instruction *, 4> Visited;
Worklist.push_back(cast<Instruction>(I));
while (!Worklist.empty()) {
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare-crash-splat.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare-crash-splat.ll
new file mode 100644
index 0000000000000..837b5e0ab5833
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare-crash-splat.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes="amdgpu-late-codegenprepare,verify" %s | FileCheck %s
+
+; This crashed because the PHI with a splat was rejected, but then we marked the PHI
+; as visited and tried to convert one of its user afterwards.
+
+define amdgpu_kernel void @widget(i1 %arg, <4 x i8> %arg1, i64 %arg2) {
+; CHECK-LABEL: define amdgpu_kernel void @widget(
+; CHECK-SAME: i1 [[ARG:%.*]], <4 x i8> [[ARG1:%.*]], i64 [[ARG2:%.*]]) {
+; CHECK-NEXT: [[BB:.*]]:
+; CHECK-NEXT: [[WIDGET_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; CHECK-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[WIDGET_KERNARG_SEGMENT]], i64 36
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i1
+; CHECK-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[WIDGET_KERNARG_SEGMENT]], i64 40
+; CHECK-NEXT: [[ARG1_LOAD:%.*]] = load <4 x i8>, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 8
+; CHECK-NEXT: [[ARG2_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[WIDGET_KERNARG_SEGMENT]], i64 44
+; CHECK-NEXT: [[ARG2_LOAD:%.*]] = load i64, ptr addrspace(4) [[ARG2_KERNARG_OFFSET]], align 4
+; CHECK-NEXT: br label %[[BB_3:.*]]
+; CHECK: [[BB_3]]:
+; CHECK-NEXT: [[PHI:%.*]] = phi ptr addrspace(1) [ null, %[[BB]] ], [ [[GETELEMENTPTR:%.*]], %[[BB_14:.*]] ]
+; CHECK-NEXT: [[PHI4:%.*]] = phi <4 x i8> [ splat (i8 1), %[[BB]] ], [ [[PHI15:%.*]], %[[BB_14]] ]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[BB_6_PREHEADER:.*]], label %[[BB_5:.*]]
+; CHECK: [[BB_5]]:
+; CHECK-NEXT: br label %[[BB_14]]
+; CHECK: [[BB_6_PREHEADER]]:
+; CHECK-NEXT: br label %[[BB_6:.*]]
+; CHECK: [[BB_6]]:
+; CHECK-NEXT: [[PHI7:%.*]] = phi <4 x i8> [ [[PHI13:%.*]], %[[BB_12:.*]] ], [ [[PHI4]], %[[BB_6_PREHEADER]] ]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[BB_8:.*]], label %[[BB_12]]
+; CHECK: [[BB_8]]:
+; CHECK-NEXT: br i1 [[TMP1]], label %[[BB_10:.*]], label %[[BB_9:.*]]
+; CHECK: [[BB_9]]:
+; CHECK-NEXT: br label %[[BB_10]]
+; CHECK: [[BB_10]]:
+; CHECK-NEXT: [[PHI11:%.*]] = phi <4 x i8> [ [[PHI7]], %[[BB_9]] ], [ zeroinitializer, %[[BB_8]] ]
+; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x i8> [[PHI11]], i64 0
+; CHECK-NEXT: store i8 [[EXTRACTELEMENT]], ptr addrspace(1) [[PHI]], align 1
+; CHECK-NEXT: br label %[[BB_12]]
+; CHECK: [[BB_12]]:
+; CHECK-NEXT: [[PHI13]] = phi <4 x i8> [ zeroinitializer, %[[BB_10]] ], [ [[PHI7]], %[[BB_6]] ]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[BB_6]], label %[[BB_14]]
+; CHECK: [[BB_14]]:
+; CHECK-NEXT: [[PHI15]] = phi <4 x i8> [ [[ARG1_LOAD]], %[[BB_5]] ], [ zeroinitializer, %[[BB_12]] ]
+; CHECK-NEXT: [[GETELEMENTPTR]] = getelementptr i8, ptr addrspace(1) [[PHI]], i64 [[ARG2_LOAD]]
+; CHECK-NEXT: br label %[[BB_3]]
+;
+bb:
+ %widget.kernarg.segment = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+ %arg.kernarg.offset.align.down = getelementptr inbounds i8, ptr addrspace(4) %widget.kernarg.segment, i64 36
+ %0 = load i32, ptr addrspace(4) %arg.kernarg.offset.align.down, align 4
+ %1 = trunc i32 %0 to i1
+ %arg1.kernarg.offset = getelementptr inbounds i8, ptr addrspace(4) %widget.kernarg.segment, i64 40
+ %arg1.load = load <4 x i8>, ptr addrspace(4) %arg1.kernarg.offset, align 8
+ %arg2.kernarg.offset = getelementptr inbounds i8, ptr addrspace(4) %widget.kernarg.segment, i64 44
+ %arg2.load = load i64, ptr addrspace(4) %arg2.kernarg.offset, align 4
+ br label %bb.3
+
+bb.3: ; preds = %bb.14, %bb
+ %phi = phi ptr addrspace(1) [ null, %bb ], [ %getelementptr, %bb.14 ]
+ %phi4 = phi <4 x i8> [ splat (i8 1), %bb ], [ %phi15, %bb.14 ]
+ br i1 %1, label %bb.6.preheader, label %bb.5
+
+bb.5: ; preds = %bb.3
+ br label %bb.14
+
+bb.6.preheader: ; preds = %bb.3
+ br label %bb.6
+
+bb.6: ; preds = %bb.6.preheader, %bb.12
+ %phi7 = phi <4 x i8> [ %phi13, %bb.12 ], [ %phi4, %bb.6.preheader ]
+ br i1 %1, label %bb.8, label %bb.12
+
+bb.8: ; preds = %bb.6
+ br i1 %1, label %bb.10, label %bb.9
+
+bb.9: ; preds = %bb.8
+ br label %bb.10
+
+bb.10: ; preds = %bb.9, %bb.8
+ %phi11 = phi <4 x i8> [ %phi7, %bb.9 ], [ zeroinitializer, %bb.8 ]
+ %extractelement = extractelement <4 x i8> %phi11, i64 0
+ store i8 %extractelement, ptr addrspace(1) %phi, align 1
+ br label %bb.12
+
+bb.12: ; preds = %bb.10, %bb.6
+ %phi13 = phi <4 x i8> [ zeroinitializer, %bb.10 ], [ %phi7, %bb.6 ]
+ br i1 %1, label %bb.6, label %bb.14
+
+bb.14: ; preds = %bb.5, %bb.12
+ %phi15 = phi <4 x i8> [ %arg1.load, %bb.5 ], [ zeroinitializer, %bb.12 ]
+ %getelementptr = getelementptr i8, ptr addrspace(1) %phi, i64 %arg2.load
+ br label %bb.3
+}
|
Make the Visited set a local variable, otherwise we can reject a PHI
(those that do not have a zeroinitializer constant) but mark it as visited,
and the rest of the function thinks the PHI is ok when it isn't.
This is a bit crude but it's the only fix that consistently worked in my testing.
Fixes SWDEV-541767