From 7c6f2dd1649c4a0754f73af067676d8a362d4507 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 13 Feb 2025 14:19:51 -0800 Subject: [PATCH] [SLP] Check for PHI nodes (potentially cycles!) when checking dependencies When checking for dependecies for gather nodes with users with the same last instruction, cannot rely on the index order, if there is (even potential!) cycle in the graph, which may cause order not work correctly and cause compiler crash. Fixes #127128 (cherry picked from commit ac217ee389d63124432e5e6890851a678f7a676b) --- .../Transforms/Vectorize/SLPVectorizer.cpp | 10 +++- .../X86/delayed-gather-emission.ll | 2 +- .../X86/matching-gather-nodes-phi-users.ll | 2 +- .../SLPVectorizer/X86/phi-node-with-cycle.ll | 59 +++++++++++++++++++ 4 files changed, 70 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index d8e7215940ef7..5b529bcf995a7 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11011,8 +11011,16 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( continue; // If the user instruction is used for some reason in different // vectorized nodes - make it depend on index. + // If any vector node is PHI node, this dependency might not work + // because of cycle dependencies, so disable it. if (TEUseEI.UserTE != UseEI.UserTE && - TEUseEI.UserTE->Idx < UseEI.UserTE->Idx) + (TEUseEI.UserTE->Idx < UseEI.UserTE->Idx || + any_of( + VectorizableTree, + [](const std::unique_ptr &TE) { + return TE->State == TreeEntry::Vectorize && + TE->getOpcode() == Instruction::PHI; + }))) continue; } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll index 5562291dbb6be..bf3f0c4df74e4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll @@ -31,7 +31,7 @@ define void @test() { ; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[I2]], 0.000000e+00 ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP9]] = insertelement <2 x float> [[TMP8]], float [[I2]], i32 0 -; CHECK-NEXT: [[TMP10]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[TMP10]] = insertelement <2 x float> [[TMP2]], float [[I2]], i32 0 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[BB1]], label [[BB2]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll index e5d7ad138b4de..28e0b06f69673 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll @@ -8,7 +8,7 @@ ; YAML: Function: test ; YAML: Args: ; YAML: - String: 'Stores SLP vectorized with cost ' -; YAML: - Cost: '-6' +; YAML: - Cost: '-3' ; YAML: - String: ' and with tree size ' ; YAML: - TreeSize: '14' ; YAML: ... diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll new file mode 100644 index 0000000000000..22e7e6a8e6624 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell < %s | FileCheck %s + +define void @test(float %0) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: float [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> , float [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = fdiv <2 x float> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> , float [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = fdiv <2 x float> [[TMP4]], zeroinitializer +; CHECK-NEXT: br label %[[BB6:.*]] +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x float> zeroinitializer, [[TMP7]] +; CHECK-NEXT: br label %[[BB10:.*]] +; CHECK: [[BB9:.*]]: +; CHECK-NEXT: br label %[[BB10]] +; CHECK: [[BB10]]: +; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x float> [ [[TMP8]], %[[BB6]] ], [ poison, %[[BB9]] ] +; CHECK-NEXT: br label %[[BB12:.*]] +; CHECK: [[BB12]]: +; CHECK-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x float> [[TMP11]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP14]], i32 1 +; CHECK-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = call float @llvm.fabs.f32(float [[TMP17]]) +; CHECK-NEXT: ret void +; + %2 = fdiv float 0.000000e+00, 0.000000e+00 + %3 = fdiv float 0.000000e+00, 0.000000e+00 + %4 = fdiv float %0, 0.000000e+00 + br label %5 + +5: + %6 = fmul float %4, 0.000000e+00 + %7 = fsub float 0.000000e+00, %6 + %8 = fmul float %3, 0.000000e+00 + %9 = fsub float 0.000000e+00, %8 + br label %11 + +10: + br label %11 + +11: + %12 = phi float [ %7, %5 ], [ 0.000000e+00, %10 ] + %13 = phi float [ %9, %5 ], [ 0.000000e+00, %10 ] + br label %14 + +14: + %15 = fmul float %2, 0.000000e+00 + %16 = fsub float %12, %15 + %17 = fmul float %4, 0.000000e+00 + %18 = fsub float %13, %17 + %19 = fadd float %16, %18 + %20 = call float @llvm.fabs.f32(float %19) + ret void +} +