diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index fee94cc167363..eb7fce106dba1 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3230,7 +3230,8 @@ class LLVM_ABI TargetLoweringBase { /// \p Mask is a mask value /// \p DeinterleaveRes is a list of deinterleaved results. virtual bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask, - ArrayRef DeinterleaveRes) const { + ArrayRef DeinterleaveRes, + unsigned Factor) const { return false; } @@ -3253,7 +3254,8 @@ class LLVM_ABI TargetLoweringBase { /// \p DeinterleaveValues contains the deinterleaved values. virtual bool lowerDeinterleaveIntrinsicToLoad(LoadInst *LI, - ArrayRef DeinterleaveValues) const { + ArrayRef DeinterleaveValues, + unsigned Factor) const { return false; } diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 9c4c86cebe7e5..04daa7656f74e 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -377,7 +377,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad( SmallVector ShuffleValues(Factor, nullptr); for (auto [Idx, ShuffleMaskIdx] : enumerate(Indices)) ShuffleValues[ShuffleMaskIdx] = Shuffles[Idx]; - if (!TLI->lowerInterleavedVPLoad(VPLoad, LaneMask, ShuffleValues)) + if (!TLI->lowerInterleavedVPLoad(VPLoad, LaneMask, ShuffleValues, Factor)) // If Extracts is not empty, tryReplaceExtracts made changes earlier. return !Extracts.empty() || BinOpShuffleChanged; } else { @@ -670,9 +670,8 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic( return false; const unsigned Factor = getIntrinsicFactor(DI); - if (!DI->hasNUses(Factor)) - return false; - SmallVector DeinterleaveValues(Factor); + SmallVector DeinterleaveValues(Factor, nullptr); + Value *LastFactor = nullptr; for (auto *User : DI->users()) { auto *Extract = dyn_cast(User); if (!Extract || Extract->getNumIndices() != 1) @@ -681,15 +680,19 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic( if (DeinterleaveValues[Idx]) return false; DeinterleaveValues[Idx] = Extract; + LastFactor = Extract; } + if (!LastFactor) + return false; + if (auto *VPLoad = dyn_cast(LoadedVal)) { if (VPLoad->getIntrinsicID() != Intrinsic::vp_load) return false; // Check mask operand. Handle both all-true/false and interleaved mask. Value *WideMask = VPLoad->getOperand(1); - Value *Mask = getMask(WideMask, Factor, - cast(DeinterleaveValues[0]->getType())); + Value *Mask = + getMask(WideMask, Factor, cast(LastFactor->getType())); if (!Mask) return false; @@ -698,7 +701,7 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic( // Since lowerInterleaveLoad expects Shuffles and LoadInst, use special // TLI function to emit target-specific interleaved instruction. - if (!TLI->lowerInterleavedVPLoad(VPLoad, Mask, DeinterleaveValues)) + if (!TLI->lowerInterleavedVPLoad(VPLoad, Mask, DeinterleaveValues, Factor)) return false; } else { @@ -710,12 +713,13 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic( << " and factor = " << Factor << "\n"); // Try and match this with target specific intrinsics. - if (!TLI->lowerDeinterleaveIntrinsicToLoad(LI, DeinterleaveValues)) + if (!TLI->lowerDeinterleaveIntrinsicToLoad(LI, DeinterleaveValues, Factor)) return false; } for (Value *V : DeinterleaveValues) - DeadInsts.insert(cast(V)); + if (V) + DeadInsts.insert(cast(V)); DeadInsts.insert(DI); // We now have a target-specific load, so delete the old one. DeadInsts.insert(cast(LoadedVal)); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index cdb68684b3856..30a9e66fec88d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17456,14 +17456,16 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, } bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad( - LoadInst *LI, ArrayRef DeinterleavedValues) const { - unsigned Factor = DeinterleavedValues.size(); + LoadInst *LI, ArrayRef DeinterleavedValues, + unsigned Factor) const { if (Factor != 2 && Factor != 4) { LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n"); return false; } - VectorType *VTy = cast(DeinterleavedValues[0]->getType()); + Value *FirstActive = *llvm::find_if(DeinterleavedValues, + [](Value *V) { return V != nullptr; }); + VectorType *VTy = cast(FirstActive->getType()); const DataLayout &DL = LI->getModule()->getDataLayout(); bool UseScalable; @@ -17512,8 +17514,10 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad( LLVM_DEBUG(dbgs() << "LdN4 res: "; LdN->dump()); } // Replace output of deinterleave2 intrinsic by output of ldN2/ldN4 - for (unsigned J = 0; J < Factor; ++J) - DeinterleavedValues[J]->replaceAllUsesWith(ExtractedLdValues[J]); + for (unsigned J = 0; J < Factor; ++J) { + if (DeinterleavedValues[J]) + DeinterleavedValues[J]->replaceAllUsesWith(ExtractedLdValues[J]); + } } else { Value *Result; if (UseScalable) @@ -17522,8 +17526,10 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad( Result = Builder.CreateCall(LdNFunc, BaseAddr, "ldN"); // Replace output of deinterleave2 intrinsic by output of ldN2/ldN4 for (unsigned I = 0; I < Factor; I++) { - Value *NewExtract = Builder.CreateExtractValue(Result, I); - DeinterleavedValues[I]->replaceAllUsesWith(NewExtract); + if (DeinterleavedValues[I]) { + Value *NewExtract = Builder.CreateExtractValue(Result, I); + DeinterleavedValues[I]->replaceAllUsesWith(NewExtract); + } } } return true; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 89f90ee2b7707..29ec0d3ffc81f 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -218,8 +218,9 @@ class AArch64TargetLowering : public TargetLowering { bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override; - bool lowerDeinterleaveIntrinsicToLoad( - LoadInst *LI, ArrayRef DeinterleaveValues) const override; + bool lowerDeinterleaveIntrinsicToLoad(LoadInst *LI, + ArrayRef DeinterleaveValues, + unsigned Factor) const override; bool lowerInterleaveIntrinsicToStore( StoreInst *SI, ArrayRef InterleaveValues) const override; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5126ab6c31c28..b5e2a58997664 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -24255,15 +24255,16 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, } bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( - LoadInst *LI, ArrayRef DeinterleaveValues) const { - unsigned Factor = DeinterleaveValues.size(); + LoadInst *LI, ArrayRef DeinterleaveValues, unsigned Factor) const { if (Factor > 8) return false; assert(LI->isSimple()); IRBuilder<> Builder(LI); - auto *ResVTy = cast(DeinterleaveValues[0]->getType()); + Value *FirstActive = + *llvm::find_if(DeinterleaveValues, [](Value *V) { return V != nullptr; }); + VectorType *ResVTy = cast(FirstActive->getType()); const DataLayout &DL = LI->getDataLayout(); @@ -24314,6 +24315,8 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( } for (auto [Idx, DIV] : enumerate(DeinterleaveValues)) { + if (!DIV) + continue; // We have to create a brand new ExtractValue to replace each // of these old ExtractValue instructions. Value *NewEV = @@ -24437,17 +24440,15 @@ static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) { /// dealing with factor of 2 (extractvalue is still required for most of other /// factors though). bool RISCVTargetLowering::lowerInterleavedVPLoad( - VPIntrinsic *Load, Value *Mask, - ArrayRef DeinterleaveResults) const { + VPIntrinsic *Load, Value *Mask, ArrayRef DeinterleaveResults, + unsigned Factor) const { assert(Mask && "Expect a valid mask"); assert(Load->getIntrinsicID() == Intrinsic::vp_load && "Unexpected intrinsic"); - const unsigned Factor = DeinterleaveResults.size(); - - auto *VTy = dyn_cast(DeinterleaveResults[0]->getType()); - if (!VTy) - return false; + Value *FirstActive = *llvm::find_if(DeinterleaveResults, + [](Value *V) { return V != nullptr; }); + VectorType *VTy = cast(FirstActive->getType()); auto &DL = Load->getModule()->getDataLayout(); Align Alignment = Load->getParamAlign(0).value_or( diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index f67d7f155c9d0..a1b283e35074a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -437,14 +437,16 @@ class RISCVTargetLowering : public TargetLowering { bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override; - bool lowerDeinterleaveIntrinsicToLoad( - LoadInst *LI, ArrayRef DeinterleaveValues) const override; + bool lowerDeinterleaveIntrinsicToLoad(LoadInst *LI, + ArrayRef DeinterleaveValues, + unsigned Factor) const override; bool lowerInterleaveIntrinsicToStore( StoreInst *SI, ArrayRef InterleaveValues) const override; bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask, - ArrayRef DeinterleaveRes) const override; + ArrayRef DeinterleaveRes, + unsigned Factor) const override; bool lowerInterleavedVPStore(VPIntrinsic *Store, Value *Mask, ArrayRef InterleaveOps) const override; diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll index a353e8e4398fa..9af92aa995f1f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -52,9 +52,8 @@ define {, } @vector_deinterleave_load_nxv16i define @vector_deinterleave_load_nxv16i8_nxv32i8_oneactive(ptr %p) { ; CHECK-LABEL: vector_deinterleave_load_nxv16i8_nxv32i8_oneactive: ; CHECK: # %bb.0: -; CHECK-NEXT: vl4r.v v12, (a0) -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret %vec = load , ptr %p %deinterleaved.results = call {, } @llvm.vector.deinterleave2.nxv32i8( %vec) @@ -65,9 +64,8 @@ define @vector_deinterleave_load_nxv16i8_nxv32i8_oneactive(pt define @vector_deinterleave_load_nxv16i8_nxv32i8_oneactive2(ptr %p) { ; CHECK-LABEL: vector_deinterleave_load_nxv16i8_nxv32i8_oneactive2: ; CHECK: # %bb.0: -; CHECK-NEXT: vl4r.v v12, (a0) -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v12, 8 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vlseg2e8.v v6, (a0) ; CHECK-NEXT: ret %vec = load , ptr %p %deinterleaved.results = call {, } @llvm.vector.deinterleave2.nxv32i8( %vec) @@ -409,23 +407,8 @@ define { , , , @vector_deinterleave_load_factor4_oneactive(ptr %p) { ; CHECK-LABEL: vector_deinterleave_load_factor4_oneactive: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb -; CHECK-NEXT: vl4r.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs4r.v v8, (a0) ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vlseg4e8.v v8, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %vec = load , ptr %p %d0 = call { , , , } @llvm.vector.deinterleave4( %vec) @@ -436,23 +419,8 @@ define @vector_deinterleave_load_factor4_oneactive(ptr %p) { define @vector_deinterleave_load_factor4_oneactive2(ptr %p) { ; CHECK-LABEL: vector_deinterleave_load_factor4_oneactive2: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb -; CHECK-NEXT: vl4r.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs4r.v v8, (a0) ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vlseg4e8.v v5, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %vec = load , ptr %p %d0 = call { , , , } @llvm.vector.deinterleave4( %vec) @@ -463,23 +431,8 @@ define @vector_deinterleave_load_factor4_oneactive2(ptr %p) { define { , , , } @vector_deinterleave_load_factor4_twoactive(ptr %p) { ; CHECK-LABEL: vector_deinterleave_load_factor4_twoactive: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb -; CHECK-NEXT: vl4r.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs4r.v v8, (a0) ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vlseg4e8.v v8, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %vec = load , ptr %p %d0 = call { , , , } @llvm.vector.deinterleave4( %vec)