@@ -16783,12 +16783,57 @@ static void knownBitsForWorkitemID(const GCNSubtarget &ST,
16783
16783
Known.Zero.setHighBits(llvm::countl_zero(MaxValue));
16784
16784
}
16785
16785
16786
+ static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT,
16787
+ KnownBits &Known, const APInt &DemandedElts,
16788
+ unsigned BFEWidth, bool SExt) {
16789
+ const MachineRegisterInfo &MRI = VT.getMachineFunction().getRegInfo();
16790
+ const MachineOperand &Src1 = MI.getOperand(2);
16791
+
16792
+ unsigned Src1Cst = 0;
16793
+ if (Src1.isImm())
16794
+ Src1Cst = Src1.getImm();
16795
+ else if (Src1.isReg()) {
16796
+ auto Cst = getIConstantVRegValWithLookThrough(Src1.getReg(), MRI);
16797
+ if (!Cst)
16798
+ return;
16799
+ Src1Cst = Cst->Value.getZExtValue();
16800
+ } else
16801
+ return;
16802
+
16803
+ const unsigned Mask = maskTrailingOnes<unsigned>(6);
16804
+ const unsigned Offset = Src1Cst & Mask;
16805
+ const unsigned Width = (Src1Cst >> 16) & Mask;
16806
+
16807
+ VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts);
16808
+
16809
+ const uint64_t WidthMask = maskTrailingOnes<uint64_t>(Width);
16810
+ Known.Zero = Known.Zero.shl(Offset) & WidthMask;
16811
+ Known.One = Known.One.shl(Offset) & WidthMask;
16812
+
16813
+ if (SExt)
16814
+ Known.sextInReg(Width);
16815
+ else
16816
+ Known.Zero |= maskLeadingOnes<unsigned>(BFEWidth - Width);
16817
+ }
16818
+
16786
16819
void SITargetLowering::computeKnownBitsForTargetInstr(
16787
16820
GISelValueTracking &VT, Register R, KnownBits &Known,
16788
16821
const APInt &DemandedElts, const MachineRegisterInfo &MRI,
16789
16822
unsigned Depth) const {
16790
16823
const MachineInstr *MI = MRI.getVRegDef(R);
16791
16824
switch (MI->getOpcode()) {
16825
+ case AMDGPU::S_BFE_I32:
16826
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32,
16827
+ /*SExt=*/true);
16828
+ case AMDGPU::S_BFE_U32:
16829
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32,
16830
+ /*SExt=*/false);
16831
+ case AMDGPU::S_BFE_I64:
16832
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64,
16833
+ /*SExt=*/true);
16834
+ case AMDGPU::S_BFE_U64:
16835
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64,
16836
+ /*SExt=*/false);
16792
16837
case AMDGPU::G_INTRINSIC:
16793
16838
case AMDGPU::G_INTRINSIC_CONVERGENT: {
16794
16839
Intrinsic::ID IID = cast<GIntrinsic>(MI)->getIntrinsicID();
0 commit comments