diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index d8e7215940ef7..5b529bcf995a7 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11011,8 +11011,16 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( continue; // If the user instruction is used for some reason in different // vectorized nodes - make it depend on index. + // If any vector node is PHI node, this dependency might not work + // because of cycle dependencies, so disable it. if (TEUseEI.UserTE != UseEI.UserTE && - TEUseEI.UserTE->Idx < UseEI.UserTE->Idx) + (TEUseEI.UserTE->Idx < UseEI.UserTE->Idx || + any_of( + VectorizableTree, + [](const std::unique_ptr &TE) { + return TE->State == TreeEntry::Vectorize && + TE->getOpcode() == Instruction::PHI; + }))) continue; } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll index 5562291dbb6be..bf3f0c4df74e4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll @@ -31,7 +31,7 @@ define void @test() { ; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[I2]], 0.000000e+00 ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP9]] = insertelement <2 x float> [[TMP8]], float [[I2]], i32 0 -; CHECK-NEXT: [[TMP10]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[TMP10]] = insertelement <2 x float> [[TMP2]], float [[I2]], i32 0 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[BB1]], label [[BB2]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll index e5d7ad138b4de..28e0b06f69673 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll @@ -8,7 +8,7 @@ ; YAML: Function: test ; YAML: Args: ; YAML: - String: 'Stores SLP vectorized with cost ' -; YAML: - Cost: '-6' +; YAML: - Cost: '-3' ; YAML: - String: ' and with tree size ' ; YAML: - TreeSize: '14' ; YAML: ... diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll new file mode 100644 index 0000000000000..22e7e6a8e6624 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell < %s | FileCheck %s + +define void @test(float %0) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: float [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> , float [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = fdiv <2 x float> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> , float [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = fdiv <2 x float> [[TMP4]], zeroinitializer +; CHECK-NEXT: br label %[[BB6:.*]] +; CHECK: [[BB6]]: +; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x float> zeroinitializer, [[TMP7]] +; CHECK-NEXT: br label %[[BB10:.*]] +; CHECK: [[BB9:.*]]: +; CHECK-NEXT: br label %[[BB10]] +; CHECK: [[BB10]]: +; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x float> [ [[TMP8]], %[[BB6]] ], [ poison, %[[BB9]] ] +; CHECK-NEXT: br label %[[BB12:.*]] +; CHECK: [[BB12]]: +; CHECK-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x float> [[TMP11]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP14]], i32 1 +; CHECK-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = call float @llvm.fabs.f32(float [[TMP17]]) +; CHECK-NEXT: ret void +; + %2 = fdiv float 0.000000e+00, 0.000000e+00 + %3 = fdiv float 0.000000e+00, 0.000000e+00 + %4 = fdiv float %0, 0.000000e+00 + br label %5 + +5: + %6 = fmul float %4, 0.000000e+00 + %7 = fsub float 0.000000e+00, %6 + %8 = fmul float %3, 0.000000e+00 + %9 = fsub float 0.000000e+00, %8 + br label %11 + +10: + br label %11 + +11: + %12 = phi float [ %7, %5 ], [ 0.000000e+00, %10 ] + %13 = phi float [ %9, %5 ], [ 0.000000e+00, %10 ] + br label %14 + +14: + %15 = fmul float %2, 0.000000e+00 + %16 = fsub float %12, %15 + %17 = fmul float %4, 0.000000e+00 + %18 = fsub float %13, %17 + %19 = fadd float %16, %18 + %20 = call float @llvm.fabs.f32(float %19) + ret void +} +