diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index bb1de58e04fbc..fb7c1bf91f3ec 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16830,12 +16830,63 @@ static void knownBitsForWorkitemID(const GCNSubtarget &ST, Known.Zero.setHighBits(llvm::countl_zero(MaxValue)); } +static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT, + KnownBits &Known, const APInt &DemandedElts, + unsigned BFEWidth, bool SExt, unsigned Depth) { + const MachineRegisterInfo &MRI = VT.getMachineFunction().getRegInfo(); + const MachineOperand &Src1 = MI.getOperand(2); + + unsigned Src1Cst = 0; + if (Src1.isImm()) { + Src1Cst = Src1.getImm(); + } else if (Src1.isReg()) { + auto Cst = getIConstantVRegValWithLookThrough(Src1.getReg(), MRI); + if (!Cst) + return; + Src1Cst = Cst->Value.getZExtValue(); + } else { + return; + } + + // Offset is at bits [4:0] for 32 bit, [5:0] for 64 bit. + // Width is always [22:16]. + const unsigned Offset = + Src1Cst & maskTrailingOnes((BFEWidth == 32) ? 5 : 6); + const unsigned Width = (Src1Cst >> 16) & maskTrailingOnes(6); + + if (Width >= BFEWidth) // Ill-formed. + return; + + VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + + Known = Known.extractBits(Width, Offset); + + if (SExt) + Known = Known.sext(BFEWidth); + else + Known = Known.zext(BFEWidth); +} + void SITargetLowering::computeKnownBitsForTargetInstr( GISelValueTracking &VT, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth) const { + Known.resetAll(); const MachineInstr *MI = MRI.getVRegDef(R); switch (MI->getOpcode()) { + case AMDGPU::S_BFE_I32: + return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32, + /*SExt=*/true, Depth); + case AMDGPU::S_BFE_U32: + return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32, + /*SExt=*/false, Depth); + case AMDGPU::S_BFE_I64: + return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64, + /*SExt=*/true, Depth); + case AMDGPU::S_BFE_U64: + return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64, + /*SExt=*/false, Depth); case AMDGPU::G_INTRINSIC: case AMDGPU::G_INTRINSIC_CONVERGENT: { Intrinsic::ID IID = cast(MI)->getIntrinsicID(); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir new file mode 100644 index 0000000000000..7a6e07ddf2290 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir @@ -0,0 +1,253 @@ +# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -passes='print' %s -filetype=null 2>&1 | FileCheck %s + +--- +name: test_s_bfe_u32_constants +body: | + bb.0: + ; Extract [12:16) + ; CHECK-LABEL: name: @test_s_bfe_u32_constants + ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16 + ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000000001111 SignBits:28 + %cst:sgpr_32(s32) = G_CONSTANT i32 65535 + %bfe:sgpr_32(s32) = S_BFE_U32 %cst, 262156, implicit-def $scc + $sgpr0 = COPY %bfe +... +--- +name: test_s_bfe_i32_constants +body: | + bb.0: + ; Extract [12:16) + ; CHECK-LABEL: name: @test_s_bfe_i32_constants + ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16 + ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32 + %cst:sgpr_32(s32) = G_CONSTANT i32 65535 + %bfe:sgpr_32(s32) = S_BFE_I32 %cst, 262156, implicit-def $scc + $sgpr0 = COPY %bfe +... +--- +name: test_s_bfe_u64_constants +body: | + bb.0: + ; Extract [12:16) + ; CHECK-LABEL: name: @test_s_bfe_u64_constants + ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48 + ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60 + %cst:sgpr_64(s64) = G_CONSTANT i64 65535 + %bfe:sgpr_64(s64) = S_BFE_U64 %cst, 262156, implicit-def $scc + $sgpr0_sgpr1 = COPY %bfe +... +--- +name: test_s_bfe_i64_constants +body: | + bb.0: + ; Extract [12:16) + ; CHECK-LABEL: name: @test_s_bfe_i64_constants + ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48 + ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111111111 SignBits:64 + %cst:sgpr_64(s64) = G_CONSTANT i64 65535 + %bfe:sgpr_64(s64) = S_BFE_I64 %cst, 262156, implicit-def $scc + $sgpr0_sgpr1 = COPY %bfe +... +--- +name: test_s_bfe_u32_middle_bits_unknown +body: | + bb.0: + ; Extract [8:16) but the middle 4 bits are ???? + liveins: $sgpr0 + + ; CHECK-LABEL: name: @test_s_bfe_u32_middle_bits_unknown + ; CHECK-NEXT: %input:sgpr_32 KnownBits:???????????????????????????????? SignBits:1 + ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001100001111111111 SignBits:16 + ; CHECK-NEXT: %mask:sgpr_32 KnownBits:00000000000000000011110000000000 SignBits:18 + ; CHECK-NEXT: %masked_input:sgpr_32 KnownBits:000000000000000000????0000000000 SignBits:18 + ; CHECK-NEXT: %merged:sgpr_32 KnownBits:000000000000000011????1111111111 SignBits:16 + ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000011????11 SignBits:24 + %input:sgpr_32(s32) = COPY $sgpr0 + %cst:sgpr_32(s32) = G_CONSTANT i32 50175 + %mask:sgpr_32(s32) = G_CONSTANT i32 15360 + %masked_input:sgpr_32(s32) = G_AND %input, %mask + %merged:sgpr_32(s32) = G_OR %masked_input, %cst + %bfe:sgpr_32(s32) = S_BFE_U32 %merged, 524296, implicit-def $scc + $sgpr0 = COPY %bfe +... +--- +name: test_s_bfe_i32_middle_bits_unknown +body: | + bb.0: + ; Extract [8:16) but the middle 4 bits are ???? + liveins: $sgpr0 + + ; CHECK-LABEL: name: @test_s_bfe_i32_middle_bits_unknown + ; CHECK-NEXT: %input:sgpr_32 KnownBits:???????????????????????????????? SignBits:1 + ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001100001111111111 SignBits:16 + ; CHECK-NEXT: %mask:sgpr_32 KnownBits:00000000000000000011110000000000 SignBits:18 + ; CHECK-NEXT: %masked_input:sgpr_32 KnownBits:000000000000000000????0000000000 SignBits:18 + ; CHECK-NEXT: %merged:sgpr_32 KnownBits:000000000000000011????1111111111 SignBits:16 + ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111????11 SignBits:26 + %input:sgpr_32(s32) = COPY $sgpr0 + %cst:sgpr_32(s32) = G_CONSTANT i32 50175 + %mask:sgpr_32(s32) = G_CONSTANT i32 15360 + %masked_input:sgpr_32(s32) = G_AND %input, %mask + %merged:sgpr_32(s32) = G_OR %masked_input, %cst + %bfe:sgpr_32(s32) = S_BFE_I32 %merged, 524296, implicit-def $scc + $sgpr0 = COPY %bfe +... +--- +name: test_s_bfe_u64_middle_bits_unknown +body: | + bb.0: + ; Extract [8:16) but the middle 4 bits are ???? + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: @test_s_bfe_u64_middle_bits_unknown + ; CHECK-NEXT: %input:sgpr_64 KnownBits:???????????????????????????????????????????????????????????????? SignBits:1 + ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001100001111111111 SignBits:48 + ; CHECK-NEXT: %mask:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000011110000000000 SignBits:50 + ; CHECK-NEXT: %masked_input:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000000????0000000000 SignBits:50 + ; CHECK-NEXT: %merged:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000011????1111111111 SignBits:48 + ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000011????11 SignBits:56 + %input:sgpr_64(s64) = COPY $sgpr0_sgpr1 + %cst:sgpr_64(s64) = G_CONSTANT i64 50175 + %mask:sgpr_64(s64) = G_CONSTANT i64 15360 + %masked_input:sgpr_64(s64) = G_AND %input, %mask + %merged:sgpr_64(s64) = G_OR %masked_input, %cst + %bfe:sgpr_64(s64) = S_BFE_U64 %merged, 524296, implicit-def $scc + $sgpr0_sgpr1 = COPY %bfe +... +--- +name: test_s_bfe_i64_middle_bits_unknown +body: | + bb.0: + ; Extract [8:16) but the middle 4 bits are ???? + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: @test_s_bfe_i64_middle_bits_unknown + ; CHECK-NEXT: %input:sgpr_64 KnownBits:???????????????????????????????????????????????????????????????? SignBits:1 + ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001100001111111111 SignBits:48 + ; CHECK-NEXT: %mask:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000011110000000000 SignBits:50 + ; CHECK-NEXT: %masked_input:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000000????0000000000 SignBits:50 + ; CHECK-NEXT: %merged:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000011????1111111111 SignBits:48 + ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111????11 SignBits:58 + %input:sgpr_64(s64) = COPY $sgpr0_sgpr1 + %cst:sgpr_64(s64) = G_CONSTANT i64 50175 + %mask:sgpr_64(s64) = G_CONSTANT i64 15360 + %masked_input:sgpr_64(s64) = G_AND %input, %mask + %merged:sgpr_64(s64) = G_OR %masked_input, %cst + %bfe:sgpr_64(s64) = S_BFE_I64 %merged, 524296, implicit-def $scc + $sgpr0_sgpr1 = COPY %bfe +... +--- +name: test_s_bfe_i32_g_constants +body: | + bb.0: + ; Extract [12:16) + ; CHECK-LABEL: name: @test_s_bfe_i32_g_constants + ; CHECK-NEXT: %src0:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16 + ; CHECK-NEXT: %src1:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13 + ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32 + %src0:sgpr_32(s32) = G_CONSTANT i32 65535 + %src1:sgpr_32(s32) = G_CONSTANT i32 262156 + %bfe:sgpr_32(s32) = S_BFE_I32 %src0, %src1, implicit-def $scc + $sgpr0 = COPY %bfe +... +--- +name: test_s_bfe_u64_g_constants +body: | + bb.0: + ; Extract [12:16) + ; CHECK-LABEL: name: @test_s_bfe_u64_g_constants + ; CHECK-NEXT: %src0:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48 + ; CHECK-NEXT: %src1:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13 + ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60 + %src0:sgpr_64(s64) = G_CONSTANT i64 65535 + %src1:sgpr_32(s32) = G_CONSTANT i32 262156 + %bfe:sgpr_64(s64) = S_BFE_U64 %src0, %src1, implicit-def $scc + $sgpr0_sgpr1 = COPY %bfe +... +--- +name: test_s_bfe_i32_g_constants_lookthrough +body: | + bb.0: + ; Extract [12:16) + ; CHECK-LABEL: name: @test_s_bfe_i32_g_constants_lookthrough + ; CHECK-NEXT: %src0:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16 + ; CHECK-NEXT: %src1:sgpr_32 KnownBits:000001000000000000001100 SignBits:5 + ; CHECK-NEXT: %src1_ext:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13 + ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32 + %src0:sgpr_32(s32) = G_CONSTANT i32 65535 + %src1:sgpr_32(s24) = G_CONSTANT i24 262156 + %src1_ext:sgpr_32(s32) = G_ZEXT %src1 + %bfe:sgpr_32(s32) = S_BFE_I32 %src0, %src1_ext, implicit-def $scc + $sgpr0 = COPY %bfe +... +--- +name: test_s_bfe_u64_g_constants_lookthrough +body: | + bb.0: + ; Extract [12:16) + ; CHECK-LABEL: name: @test_s_bfe_u64_g_constants_lookthrough + ; CHECK-NEXT: %src0:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48 + ; CHECK-NEXT: %src1:sgpr_32 KnownBits:000001000000000000001100 SignBits:5 + ; CHECK-NEXT: %src1_ext:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13 + ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60 + %src0:sgpr_64(s64) = G_CONSTANT i64 65535 + %src1:sgpr_32(s24) = G_CONSTANT i24 262156 + %src1_ext:sgpr_32(s32) = G_ZEXT %src1 + %bfe:sgpr_64(s64) = S_BFE_U64 %src0, %src1, implicit-def $scc + $sgpr0_sgpr1 = COPY %bfe +... +--- +name: test_s_bfe_u32_trash_bits +body: | + bb.0: + ; Extract [12:16) + ; Check that the 6th bit is ignored for u32. The lower 6 bits are + ; 101100 but we should mask out the first 1 for the 32 bit version. + ; CHECK-LABEL: name: @test_s_bfe_u32_trash_bits + ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16 + ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000000001111 SignBits:28 + %cst:sgpr_32(s32) = G_CONSTANT i32 65535 + %bfe:sgpr_32(s32) = S_BFE_U32 %cst, 262252, implicit-def $scc + $sgpr0 = COPY %bfe +... +--- +name: test_s_bfe_i32_trash_bits +body: | + bb.0: + ; Extract [12:16) + ; Check that the 6th bit is ignored for i32. The lower 6 bits are + ; 101100 but we should mask out the first 1 for the 32 bit version. + ; CHECK-LABEL: name: @test_s_bfe_i32_trash_bits + ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16 + ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32 + %cst:sgpr_32(s32) = G_CONSTANT i32 65535 + %bfe:sgpr_32(s32) = S_BFE_I32 %cst, 262252, implicit-def $scc + $sgpr0 = COPY %bfe +... +--- +name: test_s_bfe_u64_constants_sixth_bit +body: | + bb.0: + ; Extract [32:48) + ; Check we correctly read 6 bits for the width on 64 bit BFEs. + ; CHECK-LABEL: name: @test_s_bfe_u64_constants_sixth_bit + ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000111111111111111100000000000000000000000000000000 SignBits:16 + ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48 + %cst:sgpr_64(s64) = G_CONSTANT i64 281470681743360 + %bfe:sgpr_64(s64) = S_BFE_U64 %cst, 1048608, implicit-def $scc + $sgpr0_sgpr1 = COPY %bfe +... +--- +name: test_s_bfe_i64_constants_sixth_bit +body: | + bb.0: + ; Extract [32:48) + ; Check we correctly read 6 bits for the width on 64 bit BFEs. + ; CHECK-LABEL: name: @test_s_bfe_i64_constants_sixth_bit + ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000111111111111111100000000000000000000000000000000 SignBits:16 + ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111111111 SignBits:64 + %cst:sgpr_64(s64) = G_CONSTANT i64 281470681743360 + %bfe:sgpr_64(s64) = S_BFE_I64 %cst, 1048608, implicit-def $scc + $sgpr0_sgpr1 = COPY %bfe +...