@@ -1492,88 +1492,73 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
1492
1492
Register WidthReg = MI.getOperand (FirstOpnd + 2 ).getReg ();
1493
1493
1494
1494
const RegisterBank *DstBank =
1495
- OpdMapper.getInstrMapping ().getOperandMapping (0 ).BreakDown [0 ].RegBank ;
1496
- if (DstBank == &AMDGPU::VGPRRegBank) {
1497
- if (Ty == S32)
1498
- return true ;
1499
-
1500
- // There is no 64-bit vgpr bitfield extract instructions so the operation
1501
- // is expanded to a sequence of instructions that implement the operation.
1502
- ApplyRegBankMapping ApplyBank (B, *this , MRI, &AMDGPU::VGPRRegBank);
1495
+ OpdMapper.getInstrMapping ().getOperandMapping (0 ).BreakDown [0 ].RegBank ;
1503
1496
1504
- const LLT S64 = LLT::scalar (64 );
1505
- // Shift the source operand so that extracted bits start at bit 0.
1506
- auto ShiftOffset = Signed ? B.buildAShr (S64, SrcReg, OffsetReg)
1507
- : B.buildLShr (S64, SrcReg, OffsetReg);
1508
- auto UnmergeSOffset = B.buildUnmerge ({S32, S32}, ShiftOffset);
1509
-
1510
- // A 64-bit bitfield extract uses the 32-bit bitfield extract instructions
1511
- // if the width is a constant.
1512
- if (auto ConstWidth = getIConstantVRegValWithLookThrough (WidthReg, MRI)) {
1513
- // Use the 32-bit bitfield extract instruction if the width is a constant.
1514
- // Depending on the width size, use either the low or high 32-bits.
1515
- auto Zero = B.buildConstant (S32, 0 );
1516
- auto WidthImm = ConstWidth->Value .getZExtValue ();
1517
- if (WidthImm <= 32 ) {
1518
- // Use bitfield extract on the lower 32-bit source, and then sign-extend
1519
- // or clear the upper 32-bits.
1520
- auto Extract =
1521
- Signed ? B.buildSbfx (S32, UnmergeSOffset.getReg (0 ), Zero, WidthReg)
1522
- : B.buildUbfx (S32, UnmergeSOffset.getReg (0 ), Zero, WidthReg);
1523
- auto Extend =
1524
- Signed ? B.buildAShr (S32, Extract, B.buildConstant (S32, 31 )) : Zero;
1525
- B.buildMergeLikeInstr (DstReg, {Extract, Extend});
1526
- } else {
1527
- // Use bitfield extract on upper 32-bit source, and combine with lower
1528
- // 32-bit source.
1529
- auto UpperWidth = B.buildConstant (S32, WidthImm - 32 );
1530
- auto Extract =
1531
- Signed
1532
- ? B.buildSbfx (S32, UnmergeSOffset.getReg (1 ), Zero, UpperWidth)
1533
- : B.buildUbfx (S32, UnmergeSOffset.getReg (1 ), Zero, UpperWidth);
1534
- B.buildMergeLikeInstr (DstReg, {UnmergeSOffset.getReg (0 ), Extract});
1535
- }
1536
- MI.eraseFromParent ();
1497
+ if (DstBank != &AMDGPU::VGPRRegBank) {
1498
+ // SGPR: Canonicalize to a G_S/UBFX
1499
+ if (!isa<GIntrinsic>(MI))
1537
1500
return true ;
1538
- }
1539
1501
1540
- // Expand to Src >> Offset << (64 - Width) >> (64 - Width) using 64-bit
1541
- // operations.
1542
- auto ExtShift = B.buildSub (S32, B.buildConstant (S32, 64 ), WidthReg);
1543
- auto SignBit = B.buildShl (S64, ShiftOffset, ExtShift);
1502
+ ApplyRegBankMapping ApplyBank (B, *this , MRI, &AMDGPU::SGPRRegBank);
1544
1503
if (Signed)
1545
- B.buildAShr (S64, SignBit, ExtShift );
1504
+ B.buildSbfx (DstReg, SrcReg, OffsetReg, WidthReg );
1546
1505
else
1547
- B.buildLShr (S64, SignBit, ExtShift );
1506
+ B.buildUbfx (DstReg, SrcReg, OffsetReg, WidthReg );
1548
1507
MI.eraseFromParent ();
1549
1508
return true ;
1550
1509
}
1551
1510
1552
- // The scalar form packs the offset and width in a single operand.
1553
-
1554
- ApplyRegBankMapping ApplyBank (B, *this , MRI, &AMDGPU::SGPRRegBank);
1555
-
1556
- // Ensure the high bits are clear to insert the offset.
1557
- auto OffsetMask = B.buildConstant (S32, maskTrailingOnes<unsigned >(6 ));
1558
- auto ClampOffset = B.buildAnd (S32, OffsetReg, OffsetMask);
1559
-
1560
- // Zeros out the low bits, so don't bother clamping the input value.
1561
- auto ShiftWidth = B.buildShl (S32, WidthReg, B.buildConstant (S32, 16 ));
1562
-
1563
- // Transformation function, pack the offset and width of a BFE into
1564
- // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1565
- // source, bits [5:0] contain the offset and bits [22:16] the width.
1566
- auto MergedInputs = B.buildOr (S32, ClampOffset, ShiftWidth);
1511
+ // VGPR
1512
+ if (Ty == S32)
1513
+ return true ;
1567
1514
1568
- // TODO: It might be worth using a pseudo here to avoid scc clobber and
1569
- // register class constraints.
1570
- unsigned Opc = Ty == S32 ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
1571
- (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1515
+ // There is no 64-bit vgpr bitfield extract instructions so the operation
1516
+ // is expanded to a sequence of instructions that implement the operation.
1517
+ ApplyRegBankMapping ApplyBank (B, *this , MRI, &AMDGPU::VGPRRegBank);
1572
1518
1573
- auto MIB = B.buildInstr (Opc, {DstReg}, {SrcReg, MergedInputs});
1574
- if (!constrainSelectedInstRegOperands (*MIB, *TII, *TRI, *this ))
1575
- llvm_unreachable (" failed to constrain BFE" );
1519
+ const LLT S64 = LLT::scalar (64 );
1520
+ // Shift the source operand so that extracted bits start at bit 0.
1521
+ auto ShiftOffset = Signed ? B.buildAShr (S64, SrcReg, OffsetReg)
1522
+ : B.buildLShr (S64, SrcReg, OffsetReg);
1523
+ auto UnmergeSOffset = B.buildUnmerge ({S32, S32}, ShiftOffset);
1524
+
1525
+ // A 64-bit bitfield extract uses the 32-bit bitfield extract instructions
1526
+ // if the width is a constant.
1527
+ if (auto ConstWidth = getIConstantVRegValWithLookThrough (WidthReg, MRI)) {
1528
+ // Use the 32-bit bitfield extract instruction if the width is a constant.
1529
+ // Depending on the width size, use either the low or high 32-bits.
1530
+ auto Zero = B.buildConstant (S32, 0 );
1531
+ auto WidthImm = ConstWidth->Value .getZExtValue ();
1532
+ if (WidthImm <= 32 ) {
1533
+ // Use bitfield extract on the lower 32-bit source, and then sign-extend
1534
+ // or clear the upper 32-bits.
1535
+ auto Extract =
1536
+ Signed ? B.buildSbfx (S32, UnmergeSOffset.getReg (0 ), Zero, WidthReg)
1537
+ : B.buildUbfx (S32, UnmergeSOffset.getReg (0 ), Zero, WidthReg);
1538
+ auto Extend =
1539
+ Signed ? B.buildAShr (S32, Extract, B.buildConstant (S32, 31 )) : Zero;
1540
+ B.buildMergeLikeInstr (DstReg, {Extract, Extend});
1541
+ } else {
1542
+ // Use bitfield extract on upper 32-bit source, and combine with lower
1543
+ // 32-bit source.
1544
+ auto UpperWidth = B.buildConstant (S32, WidthImm - 32 );
1545
+ auto Extract =
1546
+ Signed ? B.buildSbfx (S32, UnmergeSOffset.getReg (1 ), Zero, UpperWidth)
1547
+ : B.buildUbfx (S32, UnmergeSOffset.getReg (1 ), Zero, UpperWidth);
1548
+ B.buildMergeLikeInstr (DstReg, {UnmergeSOffset.getReg (0 ), Extract});
1549
+ }
1550
+ MI.eraseFromParent ();
1551
+ return true ;
1552
+ }
1576
1553
1554
+ // Expand to Src >> Offset << (64 - Width) >> (64 - Width) using 64-bit
1555
+ // operations.
1556
+ auto ExtShift = B.buildSub (S32, B.buildConstant (S32, 64 ), WidthReg);
1557
+ auto SignBit = B.buildShl (S64, ShiftOffset, ExtShift);
1558
+ if (Signed)
1559
+ B.buildAShr (S64, SignBit, ExtShift);
1560
+ else
1561
+ B.buildLShr (S64, SignBit, ExtShift);
1577
1562
MI.eraseFromParent ();
1578
1563
return true ;
1579
1564
}
0 commit comments