@@ -16830,12 +16830,57 @@ static void knownBitsForWorkitemID(const GCNSubtarget &ST,
16830
16830
Known.Zero.setHighBits(llvm::countl_zero(MaxValue));
16831
16831
}
16832
16832
16833
+ static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT,
16834
+ KnownBits &Known, const APInt &DemandedElts,
16835
+ unsigned BFEWidth, bool SExt) {
16836
+ const MachineRegisterInfo &MRI = VT.getMachineFunction().getRegInfo();
16837
+ const MachineOperand &Src1 = MI.getOperand(2);
16838
+
16839
+ unsigned Src1Cst = 0;
16840
+ if (Src1.isImm())
16841
+ Src1Cst = Src1.getImm();
16842
+ else if (Src1.isReg()) {
16843
+ auto Cst = getIConstantVRegValWithLookThrough(Src1.getReg(), MRI);
16844
+ if (!Cst)
16845
+ return;
16846
+ Src1Cst = Cst->Value.getZExtValue();
16847
+ } else
16848
+ return;
16849
+
16850
+ const unsigned Mask = maskTrailingOnes<unsigned>(6);
16851
+ const unsigned Offset = Src1Cst & Mask;
16852
+ const unsigned Width = (Src1Cst >> 16) & Mask;
16853
+
16854
+ VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts);
16855
+
16856
+ const uint64_t WidthMask = maskTrailingOnes<uint64_t>(Width);
16857
+ Known.Zero = Known.Zero.shl(Offset) & WidthMask;
16858
+ Known.One = Known.One.shl(Offset) & WidthMask;
16859
+
16860
+ if (SExt)
16861
+ Known.sextInReg(Width);
16862
+ else
16863
+ Known.Zero |= maskLeadingOnes<unsigned>(BFEWidth - Width);
16864
+ }
16865
+
16833
16866
void SITargetLowering::computeKnownBitsForTargetInstr(
16834
16867
GISelValueTracking &VT, Register R, KnownBits &Known,
16835
16868
const APInt &DemandedElts, const MachineRegisterInfo &MRI,
16836
16869
unsigned Depth) const {
16837
16870
const MachineInstr *MI = MRI.getVRegDef(R);
16838
16871
switch (MI->getOpcode()) {
16872
+ case AMDGPU::S_BFE_I32:
16873
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32,
16874
+ /*SExt=*/true);
16875
+ case AMDGPU::S_BFE_U32:
16876
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32,
16877
+ /*SExt=*/false);
16878
+ case AMDGPU::S_BFE_I64:
16879
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64,
16880
+ /*SExt=*/true);
16881
+ case AMDGPU::S_BFE_U64:
16882
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64,
16883
+ /*SExt=*/false);
16839
16884
case AMDGPU::G_INTRINSIC:
16840
16885
case AMDGPU::G_INTRINSIC_CONVERGENT: {
16841
16886
Intrinsic::ID IID = cast<GIntrinsic>(MI)->getIntrinsicID();
0 commit comments