From 25899056e593040f64e8b1736267da977121ed22 Mon Sep 17 00:00:00 2001 From: pvanhout Date: Tue, 27 May 2025 10:26:24 +0200 Subject: [PATCH 1/7] [AMDGPU] Compute GISel KnownBits for S_BFE instructions --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 45 +++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index bb1de58e04fbc..d50313275db28 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16830,12 +16830,57 @@ static void knownBitsForWorkitemID(const GCNSubtarget &ST, Known.Zero.setHighBits(llvm::countl_zero(MaxValue)); } +static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT, + KnownBits &Known, const APInt &DemandedElts, + unsigned BFEWidth, bool SExt) { + const MachineRegisterInfo &MRI = VT.getMachineFunction().getRegInfo(); + const MachineOperand &Src1 = MI.getOperand(2); + + unsigned Src1Cst = 0; + if (Src1.isImm()) + Src1Cst = Src1.getImm(); + else if (Src1.isReg()) { + auto Cst = getIConstantVRegValWithLookThrough(Src1.getReg(), MRI); + if (!Cst) + return; + Src1Cst = Cst->Value.getZExtValue(); + } else + return; + + const unsigned Mask = maskTrailingOnes(6); + const unsigned Offset = Src1Cst & Mask; + const unsigned Width = (Src1Cst >> 16) & Mask; + + VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts); + + const uint64_t WidthMask = maskTrailingOnes(Width); + Known.Zero = Known.Zero.shl(Offset) & WidthMask; + Known.One = Known.One.shl(Offset) & WidthMask; + + if (SExt) + Known.sextInReg(Width); + else + Known.Zero |= maskLeadingOnes(BFEWidth - Width); +} + void SITargetLowering::computeKnownBitsForTargetInstr( GISelValueTracking &VT, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth) const { const MachineInstr *MI = MRI.getVRegDef(R); switch (MI->getOpcode()) { + case AMDGPU::S_BFE_I32: + return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32, + /*SExt=*/true); + case AMDGPU::S_BFE_U32: + return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32, + /*SExt=*/false); + case AMDGPU::S_BFE_I64: + return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64, + /*SExt=*/true); + case AMDGPU::S_BFE_U64: + return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64, + /*SExt=*/false); case AMDGPU::G_INTRINSIC: case AMDGPU::G_INTRINSIC_CONVERGENT: { Intrinsic::ID IID = cast(MI)->getIntrinsicID(); From 15b6ef207dedd1647f838bb57a430a2f4d466be4 Mon Sep 17 00:00:00 2001 From: pvanhout Date: Tue, 27 May 2025 15:44:52 +0200 Subject: [PATCH 2/7] Fixes + add tests --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 12 +- .../AMDGPU/GlobalISel/known-bits-sbfe.mir | 139 ++++++++++++++++++ 2 files changed, 146 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d50313275db28..d184db5b03aba 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16853,20 +16853,22 @@ static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT, VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts); - const uint64_t WidthMask = maskTrailingOnes(Width); - Known.Zero = Known.Zero.shl(Offset) & WidthMask; - Known.One = Known.One.shl(Offset) & WidthMask; + Known.Zero = Known.Zero.lshr(Offset); + Known.One = Known.One.lshr(Offset); + + Known = Known.trunc(Width); if (SExt) - Known.sextInReg(Width); + Known = Known.sext(BFEWidth); else - Known.Zero |= maskLeadingOnes(BFEWidth - Width); + Known = Known.zext(BFEWidth); } void SITargetLowering::computeKnownBitsForTargetInstr( GISelValueTracking &VT, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth) const { + Known.resetAll(); const MachineInstr *MI = MRI.getVRegDef(R); switch (MI->getOpcode()) { case AMDGPU::S_BFE_I32: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir new file mode 100644 index 0000000000000..85f1f66c624d9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir @@ -0,0 +1,139 @@ +# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -passes="print" %s -o /dev/null 2>&1 | FileCheck %s + +--- +name: test_s_bfe_u32_constants +body: | + bb.0: + ; Extract [12:16) + ; CHECK-LABEL: name: @test_s_bfe_u32_constants + ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16 + ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000000001111 SignBits:28 + %cst:sgpr_32(s32) = G_CONSTANT i32 65535 + %bfe:sgpr_32(s32) = S_BFE_U32 %cst, 262156, implicit-def $scc + $sgpr0 = COPY %bfe +... +--- +name: test_s_bfe_i32_constants +body: | + bb.0: + ; Extract [12:16) + ; CHECK-LABEL: name: @test_s_bfe_i32_constants + ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16 + ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32 + %cst:sgpr_32(s32) = G_CONSTANT i32 65535 + %bfe:sgpr_32(s32) = S_BFE_I32 %cst, 262156, implicit-def $scc + $sgpr0 = COPY %bfe +... +--- +name: test_s_bfe_u64_constants +body: | + bb.0: + ; Extract [12:16) + ; CHECK-LABEL: name: @test_s_bfe_u64_constants + ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48 + ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60 + %cst:sgpr_64(s64) = G_CONSTANT i64 65535 + %bfe:sgpr_64(s64) = S_BFE_U64 %cst, 262156, implicit-def $scc + $sgpr0_sgpr1 = COPY %bfe +... +--- +name: test_s_bfe_i64_constants +body: | + bb.0: + ; Extract [12:16) + ; CHECK-LABEL: name: @test_s_bfe_i64_constants + ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48 + ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111111111 SignBits:64 + %cst:sgpr_64(s64) = G_CONSTANT i64 65535 + %bfe:sgpr_64(s64) = S_BFE_I64 %cst, 262156, implicit-def $scc + $sgpr0_sgpr1 = COPY %bfe +... +--- +name: test_s_bfe_u32_middle_bits_unknown +body: | + bb.0: + ; Extract [8:16) but the middle 4 bits are ???? + liveins: $sgpr0 + + ; CHECK-LABEL: name: @test_s_bfe_u32_middle_bits_unknown + ; CHECK-NEXT: %input:sgpr_32 KnownBits:???????????????????????????????? SignBits:1 + ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001100001111111111 SignBits:16 + ; CHECK-NEXT: %mask:sgpr_32 KnownBits:00000000000000000011110000000000 SignBits:18 + ; CHECK-NEXT: %masked_input:sgpr_32 KnownBits:000000000000000000????0000000000 SignBits:18 + ; CHECK-NEXT: %merged:sgpr_32 KnownBits:000000000000000011????1111111111 SignBits:16 + ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000011????11 SignBits:24 + %input:sgpr_32(s32) = COPY $sgpr0 + %cst:sgpr_32(s32) = G_CONSTANT i32 50175 + %mask:sgpr_32(s32) = G_CONSTANT i32 15360 + %masked_input:sgpr_32(s32) = G_AND %input, %mask + %merged:sgpr_32(s32) = G_OR %masked_input, %cst + %bfe:sgpr_32(s32) = S_BFE_U32 %merged, 524296, implicit-def $scc + $sgpr0 = COPY %bfe +... +--- +name: test_s_bfe_i32_middle_bits_unknown +body: | + bb.0: + ; Extract [8:16) but the middle 4 bits are ???? + liveins: $sgpr0 + + ; CHECK-LABEL: name: @test_s_bfe_i32_middle_bits_unknown + ; CHECK-NEXT: %input:sgpr_32 KnownBits:???????????????????????????????? SignBits:1 + ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001100001111111111 SignBits:16 + ; CHECK-NEXT: %mask:sgpr_32 KnownBits:00000000000000000011110000000000 SignBits:18 + ; CHECK-NEXT: %masked_input:sgpr_32 KnownBits:000000000000000000????0000000000 SignBits:18 + ; CHECK-NEXT: %merged:sgpr_32 KnownBits:000000000000000011????1111111111 SignBits:16 + ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111????11 SignBits:26 + %input:sgpr_32(s32) = COPY $sgpr0 + %cst:sgpr_32(s32) = G_CONSTANT i32 50175 + %mask:sgpr_32(s32) = G_CONSTANT i32 15360 + %masked_input:sgpr_32(s32) = G_AND %input, %mask + %merged:sgpr_32(s32) = G_OR %masked_input, %cst + %bfe:sgpr_32(s32) = S_BFE_I32 %merged, 524296, implicit-def $scc + $sgpr0 = COPY %bfe +... +--- +name: test_s_bfe_u64_middle_bits_unknown +body: | + bb.0: + ; Extract [8:16) but the middle 4 bits are ???? + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: @test_s_bfe_u64_middle_bits_unknown + ; CHECK-NEXT: %input:sgpr_64 KnownBits:???????????????????????????????????????????????????????????????? SignBits:1 + ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001100001111111111 SignBits:48 + ; CHECK-NEXT: %mask:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000011110000000000 SignBits:50 + ; CHECK-NEXT: %masked_input:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000000????0000000000 SignBits:50 + ; CHECK-NEXT: %merged:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000011????1111111111 SignBits:48 + ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000011????11 SignBits:56 + %input:sgpr_64(s64) = COPY $sgpr0_sgpr1 + %cst:sgpr_64(s64) = G_CONSTANT i64 50175 + %mask:sgpr_64(s64) = G_CONSTANT i64 15360 + %masked_input:sgpr_64(s64) = G_AND %input, %mask + %merged:sgpr_64(s64) = G_OR %masked_input, %cst + %bfe:sgpr_64(s64) = S_BFE_U64 %merged, 524296, implicit-def $scc + $sgpr0_sgpr1 = COPY %bfe +... +--- +name: test_s_bfe_i64_middle_bits_unknown +body: | + bb.0: + ; Extract [8:16) but the middle 4 bits are ???? + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: @test_s_bfe_i64_middle_bits_unknown + ; CHECK-NEXT: %input:sgpr_64 KnownBits:???????????????????????????????????????????????????????????????? SignBits:1 + ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001100001111111111 SignBits:48 + ; CHECK-NEXT: %mask:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000011110000000000 SignBits:50 + ; CHECK-NEXT: %masked_input:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000000????0000000000 SignBits:50 + ; CHECK-NEXT: %merged:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000011????1111111111 SignBits:48 + ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111????11 SignBits:58 + %input:sgpr_64(s64) = COPY $sgpr0_sgpr1 + %cst:sgpr_64(s64) = G_CONSTANT i64 50175 + %mask:sgpr_64(s64) = G_CONSTANT i64 15360 + %masked_input:sgpr_64(s64) = G_AND %input, %mask + %merged:sgpr_64(s64) = G_OR %masked_input, %cst + %bfe:sgpr_64(s64) = S_BFE_I64 %merged, 524296, implicit-def $scc + $sgpr0_sgpr1 = COPY %bfe +... From 74b1ed3420b8cce25dc4b57b3c8d5e139bdf2507 Mon Sep 17 00:00:00 2001 From: pvanhout Date: Wed, 28 May 2025 10:31:04 +0200 Subject: [PATCH 3/7] Address comment and add more tests --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 15 ++- .../AMDGPU/GlobalISel/known-bits-sbfe.mir | 116 +++++++++++++++++- 2 files changed, 124 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d184db5b03aba..57eceb26b145e 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16837,19 +16837,22 @@ static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT, const MachineOperand &Src1 = MI.getOperand(2); unsigned Src1Cst = 0; - if (Src1.isImm()) + if (Src1.isImm()) { Src1Cst = Src1.getImm(); - else if (Src1.isReg()) { + } else if (Src1.isReg()) { auto Cst = getIConstantVRegValWithLookThrough(Src1.getReg(), MRI); if (!Cst) return; Src1Cst = Cst->Value.getZExtValue(); - } else + } else { return; + } - const unsigned Mask = maskTrailingOnes(6); - const unsigned Offset = Src1Cst & Mask; - const unsigned Width = (Src1Cst >> 16) & Mask; + // Offset is at bits [4:0] for 32 bit, [5:0] for 64 bit. + // Width is always [22:16]. + const unsigned Offset = + Src1Cst & maskTrailingOnes((BFEWidth == 32) ? 5 : 6); + const unsigned Width = (Src1Cst >> 16) & maskTrailingOnes(6); VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir index 85f1f66c624d9..7a6e07ddf2290 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5 -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -passes="print" %s -o /dev/null 2>&1 | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -passes='print' %s -filetype=null 2>&1 | FileCheck %s --- name: test_s_bfe_u32_constants @@ -137,3 +137,117 @@ body: | %bfe:sgpr_64(s64) = S_BFE_I64 %merged, 524296, implicit-def $scc $sgpr0_sgpr1 = COPY %bfe ... +--- +name: test_s_bfe_i32_g_constants +body: | + bb.0: + ; Extract [12:16) + ; CHECK-LABEL: name: @test_s_bfe_i32_g_constants + ; CHECK-NEXT: %src0:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16 + ; CHECK-NEXT: %src1:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13 + ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32 + %src0:sgpr_32(s32) = G_CONSTANT i32 65535 + %src1:sgpr_32(s32) = G_CONSTANT i32 262156 + %bfe:sgpr_32(s32) = S_BFE_I32 %src0, %src1, implicit-def $scc + $sgpr0 = COPY %bfe +... +--- +name: test_s_bfe_u64_g_constants +body: | + bb.0: + ; Extract [12:16) + ; CHECK-LABEL: name: @test_s_bfe_u64_g_constants + ; CHECK-NEXT: %src0:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48 + ; CHECK-NEXT: %src1:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13 + ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60 + %src0:sgpr_64(s64) = G_CONSTANT i64 65535 + %src1:sgpr_32(s32) = G_CONSTANT i32 262156 + %bfe:sgpr_64(s64) = S_BFE_U64 %src0, %src1, implicit-def $scc + $sgpr0_sgpr1 = COPY %bfe +... +--- +name: test_s_bfe_i32_g_constants_lookthrough +body: | + bb.0: + ; Extract [12:16) + ; CHECK-LABEL: name: @test_s_bfe_i32_g_constants_lookthrough + ; CHECK-NEXT: %src0:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16 + ; CHECK-NEXT: %src1:sgpr_32 KnownBits:000001000000000000001100 SignBits:5 + ; CHECK-NEXT: %src1_ext:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13 + ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32 + %src0:sgpr_32(s32) = G_CONSTANT i32 65535 + %src1:sgpr_32(s24) = G_CONSTANT i24 262156 + %src1_ext:sgpr_32(s32) = G_ZEXT %src1 + %bfe:sgpr_32(s32) = S_BFE_I32 %src0, %src1_ext, implicit-def $scc + $sgpr0 = COPY %bfe +... +--- +name: test_s_bfe_u64_g_constants_lookthrough +body: | + bb.0: + ; Extract [12:16) + ; CHECK-LABEL: name: @test_s_bfe_u64_g_constants_lookthrough + ; CHECK-NEXT: %src0:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48 + ; CHECK-NEXT: %src1:sgpr_32 KnownBits:000001000000000000001100 SignBits:5 + ; CHECK-NEXT: %src1_ext:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13 + ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60 + %src0:sgpr_64(s64) = G_CONSTANT i64 65535 + %src1:sgpr_32(s24) = G_CONSTANT i24 262156 + %src1_ext:sgpr_32(s32) = G_ZEXT %src1 + %bfe:sgpr_64(s64) = S_BFE_U64 %src0, %src1, implicit-def $scc + $sgpr0_sgpr1 = COPY %bfe +... +--- +name: test_s_bfe_u32_trash_bits +body: | + bb.0: + ; Extract [12:16) + ; Check that the 6th bit is ignored for u32. The lower 6 bits are + ; 101100 but we should mask out the first 1 for the 32 bit version. + ; CHECK-LABEL: name: @test_s_bfe_u32_trash_bits + ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16 + ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000000001111 SignBits:28 + %cst:sgpr_32(s32) = G_CONSTANT i32 65535 + %bfe:sgpr_32(s32) = S_BFE_U32 %cst, 262252, implicit-def $scc + $sgpr0 = COPY %bfe +... +--- +name: test_s_bfe_i32_trash_bits +body: | + bb.0: + ; Extract [12:16) + ; Check that the 6th bit is ignored for i32. The lower 6 bits are + ; 101100 but we should mask out the first 1 for the 32 bit version. + ; CHECK-LABEL: name: @test_s_bfe_i32_trash_bits + ; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16 + ; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32 + %cst:sgpr_32(s32) = G_CONSTANT i32 65535 + %bfe:sgpr_32(s32) = S_BFE_I32 %cst, 262252, implicit-def $scc + $sgpr0 = COPY %bfe +... +--- +name: test_s_bfe_u64_constants_sixth_bit +body: | + bb.0: + ; Extract [32:48) + ; Check we correctly read 6 bits for the width on 64 bit BFEs. + ; CHECK-LABEL: name: @test_s_bfe_u64_constants_sixth_bit + ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000111111111111111100000000000000000000000000000000 SignBits:16 + ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48 + %cst:sgpr_64(s64) = G_CONSTANT i64 281470681743360 + %bfe:sgpr_64(s64) = S_BFE_U64 %cst, 1048608, implicit-def $scc + $sgpr0_sgpr1 = COPY %bfe +... +--- +name: test_s_bfe_i64_constants_sixth_bit +body: | + bb.0: + ; Extract [32:48) + ; Check we correctly read 6 bits for the width on 64 bit BFEs. + ; CHECK-LABEL: name: @test_s_bfe_i64_constants_sixth_bit + ; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000111111111111111100000000000000000000000000000000 SignBits:16 + ; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111111111 SignBits:64 + %cst:sgpr_64(s64) = G_CONSTANT i64 281470681743360 + %bfe:sgpr_64(s64) = S_BFE_I64 %cst, 1048608, implicit-def $scc + $sgpr0_sgpr1 = COPY %bfe +... From ae15e5b15109c9e4050b786a3852b39bbf1bce6e Mon Sep 17 00:00:00 2001 From: pvanhout Date: Wed, 28 May 2025 11:13:54 +0200 Subject: [PATCH 4/7] Propagate depth correctly --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 57eceb26b145e..f93f9a181072e 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16832,7 +16832,7 @@ static void knownBitsForWorkitemID(const GCNSubtarget &ST, static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT, KnownBits &Known, const APInt &DemandedElts, - unsigned BFEWidth, bool SExt) { + unsigned BFEWidth, bool SExt, unsigned Depth) { const MachineRegisterInfo &MRI = VT.getMachineFunction().getRegInfo(); const MachineOperand &Src1 = MI.getOperand(2); @@ -16854,7 +16854,8 @@ static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT, Src1Cst & maskTrailingOnes((BFEWidth == 32) ? 5 : 6); const unsigned Width = (Src1Cst >> 16) & maskTrailingOnes(6); - VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts); + VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); Known.Zero = Known.Zero.lshr(Offset); Known.One = Known.One.lshr(Offset); @@ -16876,16 +16877,16 @@ void SITargetLowering::computeKnownBitsForTargetInstr( switch (MI->getOpcode()) { case AMDGPU::S_BFE_I32: return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32, - /*SExt=*/true); + /*SExt=*/true, Depth); case AMDGPU::S_BFE_U32: return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32, - /*SExt=*/false); + /*SExt=*/false, Depth); case AMDGPU::S_BFE_I64: return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64, - /*SExt=*/true); + /*SExt=*/true, Depth); case AMDGPU::S_BFE_U64: return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64, - /*SExt=*/false); + /*SExt=*/false, Depth); case AMDGPU::G_INTRINSIC: case AMDGPU::G_INTRINSIC_CONVERGENT: { Intrinsic::ID IID = cast(MI)->getIntrinsicID(); From b1c8465ef31f4bc72ea8a0b52ede0177e3318d11 Mon Sep 17 00:00:00 2001 From: pvanhout Date: Wed, 28 May 2025 13:10:39 +0200 Subject: [PATCH 5/7] add assert --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f93f9a181072e..805ba6c2eba64 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16854,6 +16854,11 @@ static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT, Src1Cst & maskTrailingOnes((BFEWidth == 32) ? 5 : 6); const unsigned Width = (Src1Cst >> 16) & maskTrailingOnes(6); + if (Width >= BFEWidth) { + assert(false && "Invalid S_BFE"); + return; + } + VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, Depth + 1); From f5a5891e720be1d2873e76b1918f9453bdfcd154 Mon Sep 17 00:00:00 2001 From: pvanhout Date: Tue, 24 Jun 2025 09:21:32 +0200 Subject: [PATCH 6/7] Change assert --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 805ba6c2eba64..e46c144272636 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16854,10 +16854,8 @@ static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT, Src1Cst & maskTrailingOnes((BFEWidth == 32) ? 5 : 6); const unsigned Width = (Src1Cst >> 16) & maskTrailingOnes(6); - if (Width >= BFEWidth) { - assert(false && "Invalid S_BFE"); + if (Width >= BFEWidth) // Ill-formed. return; - } VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, Depth + 1); From f5f905df4db852ca35e6c7c75c07a288d4823701 Mon Sep 17 00:00:00 2001 From: pvanhout Date: Thu, 26 Jun 2025 10:05:19 +0200 Subject: [PATCH 7/7] comment --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index e46c144272636..fb7c1bf91f3ec 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16860,10 +16860,7 @@ static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT, VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, Depth + 1); - Known.Zero = Known.Zero.lshr(Offset); - Known.One = Known.One.lshr(Offset); - - Known = Known.trunc(Width); + Known = Known.extractBits(Width, Offset); if (SExt) Known = Known.sext(BFEWidth);