diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index e3f6eda8ff065..417e56a908e6b 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -112,6 +112,17 @@ static cl::opt SinkIntoCycleLimit( "The maximum number of instructions considered for cycle sinking."), cl::init(50), cl::Hidden); +static cl::opt + SinkMaxFanOut("machine-sink-max-fanout", + cl::desc("The maximum fan-out count for a def to consider it " + "for sinking into a cycle"), + cl::init(2), cl::Hidden); + +static cl::opt CloneUsesThreshold( + "machine-sink-clone-uses-threshold", + cl::desc("Maximum number of uses for a sink candidate to be cloned."), + cl::init(8), cl::Hidden); + STATISTIC(NumSunk, "Number of machine instructions sunk"); STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle"); STATISTIC(NumSplit, "Number of critical edges split"); @@ -216,6 +227,16 @@ class MachineSinking { } private: + // Represents a use-def chain of instructions that can be sunk together. + struct Chain { + SmallVector Instrs; + SmallSet LiveIns; + SmallSet Defs; + SmallVector PSetDiff; + unsigned TotalLatency; + unsigned NumUses; + }; + bool ProcessBlock(MachineBasicBlock &MBB); void ProcessDbgInst(MachineInstr &MI); bool isLegalToBreakCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, @@ -260,9 +281,19 @@ class MachineSinking { void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB, SmallVectorImpl &Candidates); - bool - aggressivelySinkIntoCycle(MachineCycle *Cycle, MachineInstr &I, - DenseMap &SunkInstrs); + MachineInstr *getEarliestSinkPoint(MachineInstr *MI1, MachineInstr *MI2); + void FilterChains(const MachineCycle *Cycle, std::vector &Chains); + void SortIntoUseDefChains(SmallVectorImpl &Instrs, + std::vector &Chains, + const DenseSet &CycleLiveIns); + bool SinkChainIntoCycle(MachineCycle *Cycle, Chain &C, + std::vector &OverMaxSetPressure); + void CalculateMaxSetPressures(const MachineBasicBlock *MBB, + const DenseSet &CycleLiveIns, + std::vector &MaxSetPressure); + void CalculateMaxSetPressures(const MachineCycle *Cycle, + const DenseSet &CycleLiveIns, + std::vector &MaxSetPressure); bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -284,8 +315,6 @@ class MachineSinking { bool registerPressureSetExceedsLimit(unsigned NRegs, const TargetRegisterClass *RC, const MachineBasicBlock &MBB); - - bool registerPressureExceedsLimit(const MachineBasicBlock &MBB); }; class MachineSinkingLegacy : public MachineFunctionPass { @@ -715,7 +744,14 @@ void MachineSinking::FindCycleSinkCandidates( MachineCycle *Cycle, MachineBasicBlock *BB, SmallVectorImpl &Candidates) { for (auto &MI : *BB) { + if (MI.isDebugInstr()) + continue; LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI); + // TODO: support instructions with multiple defs + if (MI.getNumDefs() > 1) { + LLVM_DEBUG(dbgs() << "CycleSink: not sinking multi-def instruction\n"); + continue; + } if (MI.isMetaInstruction()) { LLVM_DEBUG(dbgs() << "CycleSink: not sinking meta instruction\n"); continue; @@ -741,10 +777,10 @@ void MachineSinking::FindCycleSinkCandidates( if (MI.isConvergent()) continue; - const MachineOperand &MO = MI.getOperand(0); - if (!MO.isReg() || !MO.getReg() || !MO.isDef()) + const MachineOperand &DefMO = MI.getOperand(0); + if (!DefMO.isReg() || !DefMO.getReg() || !DefMO.isDef()) continue; - if (!MRI->hasOneDef(MO.getReg())) + if (!MRI->hasOneDef(DefMO.getReg())) continue; LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n"); @@ -752,6 +788,282 @@ void MachineSinking::FindCycleSinkCandidates( } } +void MachineSinking::SortIntoUseDefChains( + SmallVectorImpl &Instrs, std::vector &UserChains, + const DenseSet &CycleLiveIns) { + DenseMap DefToInstr; + DenseMap UseCounts; + SmallSet SkipMI; + + // Do not consider defs with high fan-out count, I.E a def with many uses in + // the chain. Unrestricting this can lead to huge chains. This is bad for two + // reasons. Firstly, one node with a high use-count is likely to have a large + // liverange, so sinking it is less beneficial. Secondly, if there is one + // instruction in a chain that cannot be sunk, then this prevents the entire + // chain to be sunk. Splitting it up allows more granularity to both avoid + // this and be better for SinkIntoCycleLimit. + for (MachineInstr *MI : Instrs) { + for (const MachineOperand &MO : MI->operands()) { + if (MO.isReg()) { + Register Reg = MO.getReg(); + if (MO.isDef()) { + DefToInstr[Reg] = MI; + } else if (MO.isUse()) { + UseCounts[Reg]++; + if (UseCounts[Reg] > SinkMaxFanOut) { + LLVM_DEBUG(dbgs() << "CycleSink: Not considering sinking " << *MI + << ". Uses exceeds max fanout.\n"); + SkipMI.insert(DefToInstr[Reg]); + } + } + } + } + } + + DenseMap RegToChain; + DenseMap InstrToChain; + SmallVector Chains; + // Process each instruction, placing it into a chain based on defs/uses. + unsigned NextChainID = 0; + for (MachineInstr *MI : Instrs) { + if (SkipMI.contains(MI)) + continue; + SmallSet RelatedChains; + SmallSet LiveIns; + + // Check uses to see if this instruction interacts with a chain. + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + + // Skip physical regs as they are known to be cycle invariant. + if (!Reg.isVirtual()) + continue; + + auto It = RegToChain.find(Reg); + if (It != RegToChain.end()) + RelatedChains.insert(It->second); + else if (MO.isUse() && !CycleLiveIns.contains(Reg)) + LiveIns.insert(Reg); + } + + unsigned ChainID; + if (RelatedChains.empty()) { + // No related chain — create a new one. + ChainID = NextChainID++; + Chains.resize(NextChainID); + } else if (RelatedChains.size() == 1) { + // Single related chain — join it. + ChainID = *RelatedChains.begin(); + } else { + // Multiple related chains — merge together. + ChainID = *RelatedChains.begin(); + Chain &MainChain = Chains[ChainID]; + + for (auto It = ++RelatedChains.begin(); It != RelatedChains.end(); ++It) { + Chain &Other = Chains[*It]; + MainChain.Instrs.append(Other.Instrs.begin(), Other.Instrs.end()); + for (Register R : Other.LiveIns) + MainChain.LiveIns.insert(R); + + // Update mappings + for (MachineInstr *MovedMI : Other.Instrs) + InstrToChain[MovedMI] = ChainID; + + for (const MachineInstr *MovedMI : Other.Instrs) { + for (const MachineOperand &MO : MovedMI->operands()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (Reg && RegToChain.contains(Reg)) + RegToChain[Reg] = ChainID; + } + } + Other.Instrs.clear(); + Other.LiveIns.clear(); + } + } + + // Place this instruction into the chosen chain. + Chain &C = Chains[ChainID]; + C.Instrs.push_back(MI); + for (Register R : LiveIns) + C.LiveIns.insert(R); + InstrToChain[MI] = ChainID; + + // Update reg -> chain mappings for all defs. + for (const MachineOperand &MO : MI->operands()) { + if (MO.isReg() && MO.isDef()) + RegToChain[MO.getReg()] = ChainID; + } + } + + // Now calculate defs of each chain by traversing our list of instructions + // bottom-up, keeping a list of visited so that we don't count them as an + // external use. + for (Chain &C : Chains) { + SmallPtrSet VisitedMI; + unsigned TotalLatency = 0; + unsigned NumUses = 0; + for (MachineInstr *MI : llvm::reverse(C.Instrs)) { + TotalLatency += SchedModel.computeInstrLatency(MI, true); + VisitedMI.insert(MI); + for (MachineOperand &MO : MI->defs()) { + for (MachineInstr &UseMI : MRI->use_instructions(MO.getReg())) { + if (VisitedMI.count(&UseMI)) + continue; + NumUses++; + C.Defs.insert(MO.getReg()); + } + } + } + C.TotalLatency = TotalLatency; + C.NumUses = NumUses; + // No def chains can exist due to earlier passes not cleaning up after + // themselves. Ignore them as they will be cleaned up later. + if (C.Defs.empty()) + C.Instrs.clear(); + } + + // Copy all our working chains into the user-supplied list + for (Chain &C : Chains) + if (!C.Instrs.empty()) + UserChains.push_back(C); +} + +/// Helper function to calculate cycle liveins. +static void calculateCycleLiveIns(const MachineCycle *Cycle, + DenseSet &CycleLiveIns) { + DenseSet Defs; + + // Collect defs. + for (const MachineBasicBlock *MBB : Cycle->blocks()) { + for (const MachineInstr &MI : *MBB) { + if (MI.isDebugInstr()) + continue; + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.isDef() && + Register::isVirtualRegister(MO.getReg())) + Defs.insert(MO.getReg()); + } + } + } + + // A register that is used but never defined in the cycle is considered a + // live-in. + for (const MachineBasicBlock *MBB : Cycle->blocks()) { + for (const MachineInstr &MI : *MBB) { + if (MI.isDebugInstr()) + continue; + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.isUse() && + Register::isVirtualRegister(MO.getReg())) { + Register Reg = MO.getReg(); + if (!Defs.count(Reg)) { + CycleLiveIns.insert(Reg); + } + } + } + } + } +} + +/// If sinking a chain or series of chains introduces fewer liveins than defs, +/// it is considered profitable. Remove any chains that are unprofitable. +void MachineSinking::FilterChains(const MachineCycle *Cycle, + std::vector &Chains) { + std::vector FilteredChains; + + // Record chain live-in frequency. + SmallDenseMap RegToLiveInCount; + for (Chain &C : Chains) { + for (const Register &LiveIn : C.LiveIns) { + RegToLiveInCount[LiveIn]++; + } + } + // Add a chain to the list of FilteredChains if it has more defs than new + // liveins. Sharing a livein with another chain is considered as not adding a + // new livein. + for (Chain &C : Chains) { + assert(C.Defs.size() > 0 && "Chain with no defs?"); + // Create a pressure set diff to see how sinking this chain would affect + // register pressure. + C.PSetDiff = SmallVector(TRI->getNumRegPressureSets(), 0); + unsigned NumNewLiveIns = 0; + for (const Register &LiveIn : C.LiveIns) { + // If exactly one chain has this register as a livein and it is not + // already live in the cycle, consider this as adding a new livein if + // sunk. + if (RegToLiveInCount[LiveIn] == 1) { + NumNewLiveIns++; + // Increase register pressure for this new livein. + const auto *RC = MRI->getRegClass(LiveIn); + const int *PSetID = TRI->getRegClassPressureSets(RC); + for (; *PSetID != -1; ++PSetID) + C.PSetDiff[*PSetID] += TRI->getRegClassWeight(RC).RegWeight; + } + } + for (const Register &Def : C.Defs) { + // Decrease register pressure for this def. + const auto *RC = MRI->getRegClass(Def); + const int *PSetID = TRI->getRegClassPressureSets(RC); + for (; *PSetID != -1; ++PSetID) + C.PSetDiff[*PSetID] -= TRI->getRegClassWeight(RC).RegWeight; + } + if (C.Defs.size() > NumNewLiveIns) + FilteredChains.push_back(C); + } + Chains = FilteredChains; +} + +void MachineSinking::CalculateMaxSetPressures( + const MachineBasicBlock *MBB, const DenseSet &LiveIns, + std::vector &MaxSetPressure) { + assert(MaxSetPressure.size() == TRI->getNumRegPressureSets()); + // Create a list of live regs to pre-populate the RPTracker. + SmallVector LiveInRegs; + // TODO: Track lanes of liveins + for (unsigned Reg : LiveIns) + LiveInRegs.emplace_back(Reg, LaneBitmask::getAll()); + RegionPressure Pressure; + RegPressureTracker RPTracker(Pressure); + RPTracker.init(MBB->getParent(), &RegClassInfo, nullptr, MBB, MBB->end(), + /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true); + RPTracker.addLiveRegs(LiveInRegs); + for (auto MII = MBB->instr_end(), MIE = MBB->instr_begin(); MII != MIE; + --MII) { + const MachineInstr &MI = *std::prev(MII); + if (MI.isDebugInstr() || MI.isPseudoProbe()) + continue; + RegisterOperands RegOpers; + RegOpers.collect(MI, *TRI, *MRI, false, false); + RPTracker.recedeSkipDebugValues(); + assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!"); + RPTracker.recede(RegOpers); + } + RPTracker.closeRegion(); + for (unsigned I = 0; I < TRI->getNumRegPressureSets(); I++) + MaxSetPressure[I] = + std::max(MaxSetPressure[I], RPTracker.getPressure().MaxSetPressure[I]); +} + +/// Estimate max register pressure in the MachineCycle by traversing each +/// MachineBasicBlock with a RegPressureTracker, prepopulating the tracker with +/// pre-calculated LiveIns. +void MachineSinking::CalculateMaxSetPressures( + const MachineCycle *Cycle, const DenseSet &CycleLiveIns, + std::vector &MaxSetPressure) { + + assert(MaxSetPressure.size() == TRI->getNumRegPressureSets()); + for (const MachineBasicBlock *MBB : Cycle->blocks()) + CalculateMaxSetPressures(MBB, CycleLiveIns, MaxSetPressure); + + const DenseSet NoLiveIns; + CalculateMaxSetPressures(Cycle->getCyclePreheader(), NoLiveIns, + MaxSetPressure); +} + PreservedAnalyses MachineSinkingPass::run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) { @@ -873,63 +1185,77 @@ bool MachineSinking::run(MachineFunction &MF) { } if (SinkInstsIntoCycle) { - SmallVector Cycles(CI->toplevel_cycles()); SchedModel.init(STI); - bool HasHighPressure; - - DenseMap SunkInstrs; - - enum CycleSinkStage { COPY, LOW_LATENCY, AGGRESSIVE, END }; - for (unsigned Stage = CycleSinkStage::COPY; Stage != CycleSinkStage::END; - ++Stage, SunkInstrs.clear()) { - HasHighPressure = false; - - for (auto *Cycle : Cycles) { - MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); - if (!Preheader) { - LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n"); - continue; + SmallVector Cycles(CI->toplevel_begin(), + CI->toplevel_end()); + for (auto *Cycle : Cycles) { + MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); + if (!Preheader) { + LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n"); + continue; + } + SmallVector Candidates; + FindCycleSinkCandidates(Cycle, Preheader, Candidates); + + // Pre-calculate liveins to the cycle. This is used to calculate the max + // reg pressure in a loop, and determining how many liveins sinking a + // chain would introduce. + DenseSet CycleLiveIns; + calculateCycleLiveIns(Cycle, CycleLiveIns); + + // Initalise MaxPressureSet accross the cycle + std::vector CycleMaxSetPressure; + for (unsigned I = 0; I < TRI->getNumRegPressureSets(); I++) + CycleMaxSetPressure.push_back(0); + CalculateMaxSetPressures(Cycle, CycleLiveIns, CycleMaxSetPressure); + + std::vector Chains; + SortIntoUseDefChains(Candidates, Chains, CycleLiveIns); + FilterChains(Cycle, Chains); + + // Calculate how much over the max set pressure we are for each set. + std::vector OverMaxSetPressure; + for (unsigned I = 0; I < TRI->getNumRegPressureSets(); I++) { + unsigned Limit = TRI->getRegPressureSetLimit(MF, I); + unsigned MaxPressure = CycleMaxSetPressure[I]; + OverMaxSetPressure.push_back(((int)MaxPressure) - ((int)Limit)); + LLVM_DEBUG(dbgs() << "CycleSink: Pressure set " << I << ". Limit: " + << Limit << ". MaxPressure: " << MaxPressure << "\n"); + } + // Prioritise chains that introduce the smallest latency in the cycle. + llvm::sort(Chains, [](const Chain &A, const Chain &B) { + return (A.TotalLatency * A.NumUses) < (B.TotalLatency * B.NumUses); + }); + // Helper function to determine if sinking a chain will reduce a PSet + // that is over the max. + auto ReducesCritialPSet = [&](Chain &C) { + for (unsigned PSetID = 0; PSetID < TRI->getNumRegPressureSets(); + PSetID++) { + if (OverMaxSetPressure[PSetID] > 0 && C.PSetDiff[PSetID] < 0) + return true; } - SmallVector Candidates; - FindCycleSinkCandidates(Cycle, Preheader, Candidates); - - unsigned i = 0; - - // Walk the candidates in reverse order so that we start with the use - // of a def-use chain, if there is any. - // TODO: Sort the candidates using a cost-model. - for (MachineInstr *I : llvm::reverse(Candidates)) { - // CycleSinkStage::COPY: Sink a limited number of copies - if (Stage == CycleSinkStage::COPY) { - if (i++ == SinkIntoCycleLimit) { - LLVM_DEBUG(dbgs() - << "CycleSink: Limit reached of instructions to " - "be analyzed."); - break; - } - - if (!I->isCopy()) - continue; - } - - // CycleSinkStage::LOW_LATENCY: sink unlimited number of instructions - // which the target specifies as low-latency - if (Stage == CycleSinkStage::LOW_LATENCY && - !TII->hasLowDefLatency(SchedModel, *I, 0)) - continue; - - if (!aggressivelySinkIntoCycle(Cycle, *I, SunkInstrs)) - continue; + return false; + }; + auto IsCopyOrLowLatency = [&](const Chain &C) { + for (const MachineInstr *MI : C.Instrs) + if (!MI->isCopy() && !TII->hasLowDefLatency(SchedModel, *MI, 0)) + return false; + return true; + }; + // Sink the chains if they reduce a critical pressure set. + for (auto &C : Chains) { + // If the instruction is low latency, sink anyway. + if (!IsCopyOrLowLatency(C) && !ReducesCritialPSet(C)) + continue; + if (NumCycleSunk + C.Instrs.size() >= SinkIntoCycleLimit) { + LLVM_DEBUG(dbgs() << "CycleSink: Not sinking Chain as will exceed " + "Instruction Sink limit"); + break; + } else if (SinkChainIntoCycle(Cycle, C, OverMaxSetPressure)) { + NumCycleSunk += C.Instrs.size(); EverMadeChange = true; - ++NumCycleSunk; } - - // Recalculate the pressure after sinking - if (!HasHighPressure) - HasHighPressure = registerPressureExceedsLimit(*Preheader); } - if (!HasHighPressure) - break; } } @@ -1242,21 +1568,6 @@ bool MachineSinking::registerPressureSetExceedsLimit( return false; } -// Recalculate RP and check if any pressure set exceeds the set limit. -bool MachineSinking::registerPressureExceedsLimit( - const MachineBasicBlock &MBB) { - std::vector BBRegisterPressure = getBBRegisterPressure(MBB, false); - - for (unsigned PS = 0; PS < BBRegisterPressure.size(); ++PS) { - if (BBRegisterPressure[PS] >= - TRI->getRegPressureSetLimit(*MBB.getParent(), PS)) { - return true; - } - } - - return false; -} - /// isProfitableToSinkTo - Return true if it is profitable to sink MI. bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -1735,97 +2046,203 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, return HasAliasedStore; } -/// Aggressively sink instructions into cycles. This will aggressively try to -/// sink all instructions in the top-most preheaders in an attempt to reduce RP. -/// In particular, it will sink into multiple successor blocks without limits -/// based on the amount of sinking, or the type of ops being sunk (so long as -/// they are safe to sink). -bool MachineSinking::aggressivelySinkIntoCycle( - MachineCycle *Cycle, MachineInstr &I, - DenseMap &SunkInstrs) { - // TODO: support instructions with multiple defs - if (I.getNumDefs() > 1) - return false; - - LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Finding sink block for: " << I); - assert(Cycle->getCyclePreheader() && "Cycle sink needs a preheader block"); - SmallVector> Uses; - - MachineOperand &DefMO = I.getOperand(0); - for (MachineInstr &MI : MRI->use_instructions(DefMO.getReg())) { - Uses.push_back({{DefMO.getReg(), DefMO.getSubReg()}, &MI}); - } - - for (std::pair Entry : Uses) { - MachineInstr *MI = Entry.second; - LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Analysing use: " << MI); - if (MI->isPHI()) { - LLVM_DEBUG( - dbgs() << "AggressiveCycleSink: Not attempting to sink for PHI.\n"); - continue; - } - // We cannot sink before the prologue - if (MI->isPosition() || TII->isBasicBlockPrologue(*MI)) { - LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Use is BasicBlock prologue, " - "can't sink.\n"); - continue; +MachineInstr *MachineSinking::getEarliestSinkPoint(MachineInstr *MI1, + MachineInstr *MI2) { + MachineBasicBlock *MBB1 = MI1->getParent(); + MachineBasicBlock *MBB2 = MI2->getParent(); + if (MBB1 != MBB2) { + // If there are two uses in different blocks, set sink point to common + // dominator. + MachineBasicBlock *SinkBlock = DT->findNearestCommonDominator(MBB1, MBB2); + if (!SinkBlock) { + LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n"); + return nullptr; } - if (!Cycle->contains(MI->getParent())) { - LLVM_DEBUG( - dbgs() << "AggressiveCycleSink: Use not in cycle, can't sink.\n"); - continue; + if (SinkBlock == MBB1) + return MI1; + else if (SinkBlock == MBB2) + return MI2; + else + return &*SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()); + } else { + // If two uses have the same basic block, set sink point to whichever + // instruction appears first. + for (const MachineInstr &MI : *MBB1) { + if (&MI == MI1) { + return MI1; + } else if (&MI == MI2) { + return MI2; + } } + } + return nullptr; +} - MachineBasicBlock *SinkBlock = MI->getParent(); - MachineInstr *NewMI = nullptr; - SinkItem MapEntry(&I, SinkBlock); +/// Sink instructions into cycles if profitable. This especially tries to +/// prevent register spills caused by register pressure if there is little to no +/// overhead moving instructions into cycles. +bool MachineSinking::SinkChainIntoCycle(MachineCycle *Cycle, Chain &C, + std::vector &OverMaxSetPressure) { + LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for Chain:\n"; + for (auto *MI : C.Instrs) dbgs() << *MI << "\n";); + MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); + assert(Preheader && "Cycle sink needs a preheader block"); + DenseMap SinkLocations; + SmallPtrSet VisitedMI; + + // First, check all instructions in the Chain can be sunk, recording their + // sink location. Go in reverse order to account for uses within the chain. + for (MachineInstr *I : llvm::reverse(C.Instrs)) { + VisitedMI.insert(I); + MachineInstr *SinkToMI = nullptr; + MachineOperand &DefMO = I->getOperand(0); + for (MachineInstr &MI : MRI->use_instructions(DefMO.getReg())) { + if (VisitedMI.contains(&MI)) { + // Use is in the chain. Find the earliest sink point between our + // current sink point, if we have one, and the use's sink point. + if (SinkToMI) { + SinkToMI = getEarliestSinkPoint(SinkToMI, SinkLocations[&MI]); + if (!SinkToMI) { + LLVM_DEBUG(dbgs() + << "CycleSink: Could not get earliest sink point of " + << SinkToMI << " and " << SinkLocations[&MI]); + return false; + } + } else { + SinkToMI = SinkLocations[&MI]; + } + continue; + } + LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI); - auto SI = SunkInstrs.find(MapEntry); + if (!Cycle->contains(MI.getParent())) { + LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n"); + return false; + } - // Check for the case in which we have already sunk a copy of this - // instruction into the user block. - if (SI != SunkInstrs.end()) { - LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Already sunk to block: " - << printMBBReference(*SinkBlock) << "\n"); - NewMI = SI->second; + if (MI.isPHI()) + return false; + + if (MI.getParent() == Cycle->getCyclePreheader()) { + LLVM_DEBUG( + dbgs() << "CycleSink: Not sinking, sink block is the preheader\n"); + return false; + } + + if (!SinkToMI) { + SinkToMI = &MI; + LLVM_DEBUG(dbgs() << "CycleSink: Setting sink MI to: " << MI << "\n"); + continue; + } + SinkToMI = getEarliestSinkPoint(&MI, SinkToMI); + if (!SinkToMI) + return false; } + LLVM_DEBUG(dbgs() << "CycleSink: Setting sink location for " << *I + << "to : " << *SinkToMI << "\n"); + SinkLocations[I] = SinkToMI; + } - // Create a copy of the instruction in the use block. - if (!NewMI) { - LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Sinking instruction to block: " - << printMBBReference(*SinkBlock) << "\n"); - - NewMI = I.getMF()->CloneMachineInstr(&I); - if (DefMO.getReg().isVirtual()) { - const TargetRegisterClass *TRC = MRI->getRegClass(DefMO.getReg()); - Register DestReg = MRI->createVirtualRegister(TRC); - NewMI->substituteRegister(DefMO.getReg(), DestReg, DefMO.getSubReg(), - *TRI); + // Sink/clone all the instructions. Going in reverse because uses within the + // chain need to be cloned before defs to easily update uses. + // However, this does make it difficult when multiple instructions have the + // same sink point, as they will be sunk in reverse order, reversing the + // order of the instructions. Account for this by keeping track of how many + // times an instruction has been sunk to a location. + SmallPtrSet ToDelete; + SmallPtrSet ToClone; + SmallDenseMap NumSunkToSinkPoint; + SmallDenseMap EarliestClonePointInBlock; + for (MachineInstr *I : llvm::reverse(C.Instrs)) { + auto SinkLocation = SinkLocations[I]; + + bool CloneUses = true; + SmallDenseMap FirstUseInBlock; + // Set of users will act as a worklist when it comes to cloning. + SmallPtrSet UserMIs; + MachineOperand &DefMO = I->getOperand(0); + // If the number of uses from the def exceeds a threshold, determined + // by opt param CloneUsesThreshold, don't clone as the spiller is much + // more likely to prioritise other values to spill. + if (C.NumUses > CloneUsesThreshold) { + LLVM_DEBUG(dbgs() << "CycleSink: Chain with too many uses (" << C.NumUses + << "). Not considering cloning.\n"); + CloneUses = false; + continue; + } + // Add all users to a worklist, recording the first user in each block as + // to limit cloning to once per block to avoid excessive cloning. + for (MachineInstr &UserMI : MRI->use_instructions(DefMO.getReg())) { + MachineBasicBlock *UserBlock = UserMI.getParent(); + UserMIs.insert(&UserMI); + auto It = FirstUseInBlock.find(UserBlock); + MachineInstr *SinkPoint = It == FirstUseInBlock.end() + ? &UserMI + : getEarliestSinkPoint(It->second, &UserMI); + FirstUseInBlock[UserBlock] = SinkPoint; + } + // Sink if there's only one clone point + if (!CloneUses || FirstUseInBlock.size() == 1) { + LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction " << *I << "\n"); + // Adjust the sink location due to iterating in reverse. + auto It = NumSunkToSinkPoint.find(SinkLocation); + if (It != NumSunkToSinkPoint.end()) { + auto BlockIt = MachineBasicBlock::iterator(SinkLocation); + unsigned Step = It->second; + while (Step--) + --BlockIt; + NumSunkToSinkPoint[SinkLocation] = It->second + 1; + SinkLocation = &*BlockIt; + } else { + NumSunkToSinkPoint[SinkLocation] = 1; + } + SinkLocation->getParent()->splice(SinkLocation, Preheader, I); + } else { + LLVM_DEBUG(dbgs() << "CycleSink: Cloning instruction " << *I << "\n"); + for (MachineInstr *UserMI : UserMIs) { + MachineBasicBlock *UserMBB = UserMI->getParent(); + // Only clone to the earliest user in the block. + if (FirstUseInBlock[UserMBB] != UserMI || ToDelete.contains(UserMI)) + continue; + // Actually do the cloning, keeping track of the new registers. + MachineInstr *Clone = I->getMF()->CloneMachineInstr(I); + Register DefReg = I->getOperand(0).getReg(); + Register NewVReg = MRI->createVirtualRegister(MRI->getRegClass(DefReg)); + Clone->getOperand(0).setReg(NewVReg); + UserMBB->insert(UserMI, Clone); + ToDelete.insert(I); + // Update registers for all users within this block. + for (MachineInstr *UserMI2 : UserMIs) + if (UserMI2->getParent() == UserMBB) + for (MachineOperand &MO : UserMI2->operands()) + if (MO.isReg() && MO.getReg() == DefReg) + MO.setReg(NewVReg); + // Adjust future instructions that want to sink to this location. + // Account for the fact we may be cloning to an already cloned or + // sunk MI, in which case find the original sink point and increment. + auto UserSinkLoc = SinkLocations.find(UserMI); + MachineInstr *OriginalSinkLoc = + UserSinkLoc != SinkLocations.end() ? UserSinkLoc->second : UserMI; + auto It = NumSunkToSinkPoint.find(OriginalSinkLoc); + NumSunkToSinkPoint[OriginalSinkLoc] = + It == NumSunkToSinkPoint.end() ? 1 : It->second + 1; + SinkLocations[Clone] = OriginalSinkLoc; } - SinkBlock->insert(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), - NewMI); - SunkInstrs.insert({MapEntry, NewMI}); } // Conservatively clear any kill flags on uses of sunk instruction - for (MachineOperand &MO : NewMI->all_uses()) { - assert(MO.isReg() && MO.isUse()); - RegsToClearKillFlags.insert(MO.getReg()); + for (MachineOperand &MO : I->operands()) { + if (MO.isReg() && MO.readsReg()) + RegsToClearKillFlags.insert(MO.getReg()); } + } - // The instruction is moved from its basic block, so do not retain the - // debug information. - assert(!NewMI->isDebugInstr() && "Should not sink debug inst"); - NewMI->setDebugLoc(DebugLoc()); + // Adjust max set pressure + for (unsigned PSetID = 0; PSetID < TRI->getNumRegPressureSets(); PSetID++) + OverMaxSetPressure[PSetID] += C.PSetDiff[PSetID]; - // Replace the use with the newly created virtual register. - RegSubRegPair &UseReg = Entry.first; - MI->substituteRegister(UseReg.Reg, NewMI->getOperand(0).getReg(), - UseReg.SubReg, *TRI); - } - // If we have replaced all uses, then delete the dead instruction - if (I.isDead(*MRI)) - I.eraseFromParent(); + for (MachineInstr *MI : ToDelete) + MI->eraseFromParent(); return true; } diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-cycle.mir b/llvm/test/CodeGen/AMDGPU/machine-sink-cycle.mir index f718749bff3d3..c8932d8c258e5 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-sink-cycle.mir +++ b/llvm/test/CodeGen/AMDGPU/machine-sink-cycle.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 --sink-insts-to-avoid-spills=1 --stop-after=machine-sink -o - %s | FileCheck -check-prefixes=GFX9-SUNK %s -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 --sink-insts-to-avoid-spills=1 -mattr=+wavefrontsize64 --stop-after=machine-sink -o - %s | FileCheck -check-prefixes=GFX10-SUNK %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 --sink-insts-to-avoid-spills=1 -machine-sink-cycle-limit=10000 -mattr=+wavefrontsize64 --stop-after=machine-sink -o - %s | FileCheck -check-prefixes=GFX10-SUNK %s --- name: test_sink_copy @@ -29,24 +29,24 @@ body: | ; GFX9-SUNK-NEXT: bb.2: ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY1:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY2:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY3:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY4:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] - ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]] ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 ; GFX9-SUNK-NEXT: {{ $}} ; GFX9-SUNK-NEXT: bb.3: ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY5:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY6:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY7:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY8:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY9:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] - ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]] ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 ; GFX9-SUNK-NEXT: {{ $}} @@ -81,24 +81,24 @@ body: | ; GFX10-SUNK-NEXT: bb.2: ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY1:%[0-9]+]]:vreg_256 = COPY [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY2:%[0-9]+]]:vreg_256 = COPY [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY3:%[0-9]+]]:vreg_256 = COPY [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY4:%[0-9]+]]:vreg_256 = COPY [[DEF4]] - ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]] ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 ; GFX10-SUNK-NEXT: {{ $}} ; GFX10-SUNK-NEXT: bb.3: ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY5:%[0-9]+]]:vreg_256 = COPY [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY6:%[0-9]+]]:vreg_256 = COPY [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY7:%[0-9]+]]:vreg_256 = COPY [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY8:%[0-9]+]]:vreg_256 = COPY [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY9:%[0-9]+]]:vreg_256 = COPY [[DEF4]] - ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]] ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 ; GFX10-SUNK-NEXT: {{ $}} @@ -183,12 +183,12 @@ body: | ; GFX9-SUNK-NEXT: bb.2: ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY1:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY2:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY3:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY4:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] - ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]] ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]] ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 @@ -196,12 +196,12 @@ body: | ; GFX9-SUNK-NEXT: bb.3: ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY5:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY6:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY7:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY8:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] ; GFX9-SUNK-NEXT: [[COPY9:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] - ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]] ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]] ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 @@ -237,36 +237,36 @@ body: | ; GFX10-SUNK-NEXT: bb.2: ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX10-SUNK-NEXT: {{ $}} - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY1:%[0-9]+]]:vreg_256 = COPY [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY2:%[0-9]+]]:vreg_256 = COPY [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY3:%[0-9]+]]:vreg_256 = COPY [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY4:%[0-9]+]]:vreg_256 = COPY [[DEF4]] - ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]] + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]] ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 ; GFX10-SUNK-NEXT: {{ $}} ; GFX10-SUNK-NEXT: bb.3: ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX10-SUNK-NEXT: {{ $}} - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY5:%[0-9]+]]:vreg_256 = COPY [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY6:%[0-9]+]]:vreg_256 = COPY [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY7:%[0-9]+]]:vreg_256 = COPY [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY8:%[0-9]+]]:vreg_256 = COPY [[DEF4]] ; GFX10-SUNK-NEXT: [[COPY9:%[0-9]+]]:vreg_256 = COPY [[DEF4]] - ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]] + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]] ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 ; GFX10-SUNK-NEXT: {{ $}} @@ -412,57 +412,57 @@ body: | ; GFX10-SUNK-NEXT: bb.2: ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX10-SUNK-NEXT: {{ $}} - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] - ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]] + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_1]] ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 ; GFX10-SUNK-NEXT: {{ $}} ; GFX10-SUNK-NEXT: bb.3: ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX10-SUNK-NEXT: {{ $}} - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] - ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]], implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]] + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_26]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_22]] ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 ; GFX10-SUNK-NEXT: {{ $}} ; GFX10-SUNK-NEXT: bb.4: @@ -543,17 +543,6 @@ body: | ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF - ; GFX9-SUNK-NEXT: S_BRANCH %bb.1 - ; GFX9-SUNK-NEXT: {{ $}} - ; GFX9-SUNK-NEXT: bb.1: - ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GFX9-SUNK-NEXT: {{ $}} - ; GFX9-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec - ; GFX9-SUNK-NEXT: S_BRANCH %bb.3 - ; GFX9-SUNK-NEXT: {{ $}} - ; GFX9-SUNK-NEXT: bb.2: - ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) - ; GFX9-SUNK-NEXT: {{ $}} ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec @@ -567,56 +556,43 @@ body: | ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.1: + ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.3 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.2: + ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] - ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]] + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_26]], implicit [[V_ADD_U32_e64_25]] ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 ; GFX9-SUNK-NEXT: {{ $}} ; GFX9-SUNK-NEXT: bb.3: ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX9-SUNK-NEXT: {{ $}} - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] - ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_53]] + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_28]] ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 ; GFX9-SUNK-NEXT: {{ $}} ; GFX9-SUNK-NEXT: bb.4: @@ -650,69 +626,69 @@ body: | ; GFX10-SUNK-NEXT: bb.2: ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX10-SUNK-NEXT: {{ $}} - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] - ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]] + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_26]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_1]] ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 ; GFX10-SUNK-NEXT: {{ $}} ; GFX10-SUNK-NEXT: bb.3: ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX10-SUNK-NEXT: {{ $}} - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] - ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_53]] + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_53]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_28]] ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 ; GFX10-SUNK-NEXT: {{ $}} ; GFX10-SUNK-NEXT: bb.4: @@ -801,6 +777,29 @@ body: | ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec ; GFX9-SUNK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX9-SUNK-NEXT: [[DEF6:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF ; GFX9-SUNK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF @@ -818,69 +817,23 @@ body: | ; GFX9-SUNK-NEXT: bb.2: ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX9-SUNK-NEXT: {{ $}} - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] - ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]] + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_26]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_24]] ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 ; GFX9-SUNK-NEXT: {{ $}} ; GFX9-SUNK-NEXT: bb.3: ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX9-SUNK-NEXT: {{ $}} - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] - ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_53]] + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_28]] ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 ; GFX9-SUNK-NEXT: {{ $}} ; GFX9-SUNK-NEXT: bb.4: @@ -920,69 +873,69 @@ body: | ; GFX10-SUNK-NEXT: bb.2: ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX10-SUNK-NEXT: {{ $}} - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] - ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]] + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_26]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_1]] ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 ; GFX10-SUNK-NEXT: {{ $}} ; GFX10-SUNK-NEXT: bb.3: ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX10-SUNK-NEXT: {{ $}} - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] - ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_53]] + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_53]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_28]] ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 ; GFX10-SUNK-NEXT: {{ $}} ; GFX10-SUNK-NEXT: bb.4: @@ -1078,19 +1031,6 @@ body: | ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF - ; GFX9-SUNK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[DEF]].sub2, [[DEF1]].sub4, 0, implicit $exec - ; GFX9-SUNK-NEXT: S_BRANCH %bb.1 - ; GFX9-SUNK-NEXT: {{ $}} - ; GFX9-SUNK-NEXT: bb.1: - ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GFX9-SUNK-NEXT: {{ $}} - ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_ADD_CO_U32_e64_1]] - ; GFX9-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec - ; GFX9-SUNK-NEXT: S_BRANCH %bb.3 - ; GFX9-SUNK-NEXT: {{ $}} - ; GFX9-SUNK-NEXT: bb.2: - ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) - ; GFX9-SUNK-NEXT: {{ $}} ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec @@ -1104,56 +1044,46 @@ body: | ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[DEF]].sub2, [[DEF1]].sub4, 0, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.1: + ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_ADD_CO_U32_e64_1]] + ; GFX9-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.3 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.2: + ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] - ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]] + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_26]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_24]] ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 ; GFX9-SUNK-NEXT: {{ $}} ; GFX9-SUNK-NEXT: bb.3: ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX9-SUNK-NEXT: {{ $}} - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec - ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] - ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_53]] + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_28]] ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 ; GFX9-SUNK-NEXT: {{ $}} ; GFX9-SUNK-NEXT: bb.4: @@ -1189,69 +1119,69 @@ body: | ; GFX10-SUNK-NEXT: bb.2: ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX10-SUNK-NEXT: {{ $}} - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] - ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]] + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_26]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_1]] ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 ; GFX10-SUNK-NEXT: {{ $}} ; GFX10-SUNK-NEXT: bb.3: ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) ; GFX10-SUNK-NEXT: {{ $}} - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec - ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] - ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_53]] + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_53]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_28]] ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 ; GFX10-SUNK-NEXT: {{ $}} ; GFX10-SUNK-NEXT: bb.4: @@ -1324,3 +1254,575 @@ body: | S_ENDPGM 0 ... +# Sinking will increase pressure in loop +--- +name: test_sink_no_increase +alignment: 1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GFX9-SUNK-LABEL: name: test_sink_no_increase + ; GFX9-SUNK: bb.0: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF5:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF6:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF7:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF8]], [[DEF9]], 0, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.1: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]] + ; GFX9-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX9-SUNK-NEXT: S_BRANCH %bb.2 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.2: + ; GFX9-SUNK-NEXT: S_ENDPGM 0 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.3.entry: + ; + ; GFX10-SUNK-LABEL: name: test_sink_no_increase + ; GFX10-SUNK: bb.0: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF5:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF6:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF7:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF8]], [[DEF9]], 0, implicit $exec + ; GFX10-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.1: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]] + ; GFX10-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX10-SUNK-NEXT: S_BRANCH %bb.2 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.2: + ; GFX10-SUNK-NEXT: S_ENDPGM 0 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.3.entry: + bb.0: + %0:vreg_256 = IMPLICIT_DEF + %1:vreg_256 = IMPLICIT_DEF + %2:vreg_256 = IMPLICIT_DEF + %3:vreg_256 = IMPLICIT_DEF + %4:vreg_256 = IMPLICIT_DEF + %5:vreg_256 = IMPLICIT_DEF + %6:vreg_256 = IMPLICIT_DEF + %7:vreg_256 = IMPLICIT_DEF + %8:vreg_256 = IMPLICIT_DEF + %9:vgpr_32 = IMPLICIT_DEF + %10:vgpr_32 = IMPLICIT_DEF + %11:vgpr_32 = V_ADD_U32_e64 %9, %10, 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %6, implicit %7, implicit %8 + INLINEASM &"", 1, implicit %11 + S_CBRANCH_SCC1 %bb.1, implicit undef $scc + S_BRANCH %bb.5 + + bb.5: + S_ENDPGM 0 +... + +# Sinking %11 and %12 will not decrease rp +--- +name: test_sink_no_rp_decrease +alignment: 1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GFX9-SUNK-LABEL: name: test_sink_no_rp_decrease + ; GFX9-SUNK: bb.0: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF5:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF6:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF7:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF8]], [[DEF9]], 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[V_ADD_U32_e64_]], [[DEF9]], 0, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.1: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_MUL_U32_U24_e64_]] + ; GFX9-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX9-SUNK-NEXT: S_BRANCH %bb.2 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.2: + ; GFX9-SUNK-NEXT: S_ENDPGM 0 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.3.entry: + ; + ; GFX10-SUNK-LABEL: name: test_sink_no_rp_decrease + ; GFX10-SUNK: bb.0: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF5:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF6:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF7:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF8]], [[DEF9]], 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[V_ADD_U32_e64_]], [[DEF9]], 0, implicit $exec + ; GFX10-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.1: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_MUL_U32_U24_e64_]] + ; GFX10-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX10-SUNK-NEXT: S_BRANCH %bb.2 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.2: + ; GFX10-SUNK-NEXT: S_ENDPGM 0 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.3.entry: + bb.0: + %0:vreg_256 = IMPLICIT_DEF + %1:vreg_256 = IMPLICIT_DEF + %2:vreg_256 = IMPLICIT_DEF + %3:vreg_256 = IMPLICIT_DEF + %4:vreg_256 = IMPLICIT_DEF + %5:vreg_256 = IMPLICIT_DEF + %6:vreg_256 = IMPLICIT_DEF + %7:vreg_256 = IMPLICIT_DEF + %8:vreg_256 = IMPLICIT_DEF + %9:vgpr_32 = IMPLICIT_DEF + %10:vgpr_32 = IMPLICIT_DEF + %11:vgpr_32 = V_ADD_U32_e64 %9, %10, 0, implicit $exec + %12:vgpr_32 = V_MUL_U32_U24_e64 %11, %10, 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %6, implicit %7, implicit %8 + INLINEASM &"", 1, implicit %11, implicit %12 + S_CBRANCH_SCC1 %bb.1, implicit undef $scc + S_BRANCH %bb.5 + + bb.5: + S_ENDPGM 0 +... + + +# Drecreases rp only if %11 and %12 are sunk together +--- +name: test_sink_rp_decrease +alignment: 1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GFX9-SUNK-LABEL: name: test_sink_rp_decrease + ; GFX9-SUNK: bb.0: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF5:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF6:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF7:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.1: + ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]] + ; GFX9-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.3 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.2: + ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF8]], [[DEF8]], 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[V_ADD_U32_e64_]], [[DEF8]], 0, implicit $exec + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_MUL_U32_U24_e64_]] + ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.3: + ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF8]], [[DEF8]], 0, implicit $exec + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_1]] + ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.4: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX9-SUNK-NEXT: S_BRANCH %bb.5 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.5: + ; GFX9-SUNK-NEXT: S_ENDPGM 0 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.6.entry: + ; + ; GFX10-SUNK-LABEL: name: test_sink_rp_decrease + ; GFX10-SUNK: bb.0: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF5:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF6:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF7:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.1: + ; GFX10-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]] + ; GFX10-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX10-SUNK-NEXT: S_BRANCH %bb.3 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.2: + ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF8]], [[DEF8]], 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[V_ADD_U32_e64_]], [[DEF8]], 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_MUL_U32_U24_e64_]] + ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.3: + ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF8]], [[DEF8]], 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_1]] + ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.4: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX10-SUNK-NEXT: S_BRANCH %bb.5 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.5: + ; GFX10-SUNK-NEXT: S_ENDPGM 0 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.6.entry: + bb.0: + %0:vreg_256 = IMPLICIT_DEF + %1:vreg_256 = IMPLICIT_DEF + %2:vreg_256 = IMPLICIT_DEF + %3:vreg_256 = IMPLICIT_DEF + %4:vreg_256 = IMPLICIT_DEF + %5:vreg_256 = IMPLICIT_DEF + %6:vreg_256 = IMPLICIT_DEF + %7:vreg_256 = IMPLICIT_DEF + %8:vreg_256 = IMPLICIT_DEF + %9:vgpr_32 = IMPLICIT_DEF + %10:vgpr_32 = IMPLICIT_DEF + %11:vgpr_32 = V_ADD_U32_e64 %10, %10, 0, implicit $exec + %12:vgpr_32 = V_MUL_U32_U24_e64 %11, %10, 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %6, implicit %7, implicit %8 + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.3 + + bb.2: + INLINEASM &"", 1, implicit %11, implicit %12 + S_BRANCH %bb.4 + + bb.3: + INLINEASM &"", 1, implicit %11 + S_BRANCH %bb.4 + + bb.4: + S_CBRANCH_SCC1 %bb.1, implicit undef $scc + S_BRANCH %bb.5 + + bb.5: + S_ENDPGM 0 +... + +# Prioritise sinking %13 as it's used fewer times +--- +name: test_sink_fewest_uses +alignment: 1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GFX9-SUNK-LABEL: name: test_sink_fewest_uses + ; GFX9-SUNK: bb.0: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF5:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF6:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF7:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF9]], [[DEF9]], 0, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.1: + ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]] + ; GFX9-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.3 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.2: + ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF10]], [[DEF10]], 0, implicit $exec + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]] + ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.3: + ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]] + ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.4: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX9-SUNK-NEXT: S_BRANCH %bb.5 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.5: + ; GFX9-SUNK-NEXT: S_ENDPGM 0 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.6.entry: + ; + ; GFX10-SUNK-LABEL: name: test_sink_fewest_uses + ; GFX10-SUNK: bb.0: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF5:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF6:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.1: + ; GFX10-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]] + ; GFX10-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX10-SUNK-NEXT: S_BRANCH %bb.3 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.2: + ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF10]], [[DEF10]], 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF9]], [[DEF9]], 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_]] + ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.3: + ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF9]], [[DEF9]], 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_2]] + ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.4: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX10-SUNK-NEXT: S_BRANCH %bb.5 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.5: + ; GFX10-SUNK-NEXT: S_ENDPGM 0 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.6.entry: + bb.0: + %0:vreg_256 = IMPLICIT_DEF + %1:vreg_256 = IMPLICIT_DEF + %2:vreg_256 = IMPLICIT_DEF + %3:vreg_256 = IMPLICIT_DEF + %4:vreg_256 = IMPLICIT_DEF + %5:vreg_256 = IMPLICIT_DEF + %6:vreg_256 = IMPLICIT_DEF + %7:vreg_256 = IMPLICIT_DEF + %8:vreg_128 = IMPLICIT_DEF + %9:vgpr_32 = IMPLICIT_DEF + %10:vgpr_32 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vgpr_32 = V_ADD_U32_e64 %10, %10, 0, implicit $exec + %13:vgpr_32 = V_ADD_U32_e64 %11, %11, 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11 + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.3 + + bb.2: + INLINEASM &"", 1, implicit %12, implicit %13 + S_BRANCH %bb.4 + + bb.3: + INLINEASM &"", 1, implicit %12 + S_BRANCH %bb.4 + + bb.4: + S_CBRANCH_SCC1 %bb.1, implicit undef $scc + S_BRANCH %bb.5 + + bb.5: + S_ENDPGM 0 +... + +--- +name: test_high_rp_inside +alignment: 1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GFX9-SUNK-LABEL: name: test_high_rp_inside + ; GFX9-SUNK: bb.0: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.1: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, def %8 + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, def %9 + ; GFX9-SUNK-NEXT: [[COPY:%[0-9]+]]:vreg_256_align2 = COPY %8 + ; GFX9-SUNK-NEXT: [[COPY1:%[0-9]+]]:vreg_256_align2 = COPY %9 + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF4]], [[DEF5]], 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[V_ADD_U32_e64_]], [[DEF5]], 0, implicit $exec + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_MUL_U32_U24_e64_]], implicit %8, implicit %9, implicit [[COPY]], implicit [[COPY1]], implicit [[DEF4]], implicit [[DEF5]] + ; GFX9-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX9-SUNK-NEXT: S_BRANCH %bb.2 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.2: + ; GFX9-SUNK-NEXT: S_ENDPGM 0 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.3.entry: + ; GFX9-SUNK-NEXT: successors: + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.4.entry: + ; + ; GFX10-SUNK-LABEL: name: test_high_rp_inside + ; GFX10-SUNK: bb.0: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.1: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, def %8 + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, def %9 + ; GFX10-SUNK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY %8 + ; GFX10-SUNK-NEXT: [[COPY1:%[0-9]+]]:vreg_256 = COPY %9 + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF4]], [[DEF5]], 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[V_ADD_U32_e64_]], [[DEF5]], 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_MUL_U32_U24_e64_]], implicit %8, implicit %9, implicit [[COPY]], implicit [[COPY1]], implicit [[DEF4]], implicit [[DEF5]] + ; GFX10-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX10-SUNK-NEXT: S_BRANCH %bb.2 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.2: + ; GFX10-SUNK-NEXT: S_ENDPGM 0 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.3.entry: + ; GFX10-SUNK-NEXT: successors: + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.4.entry: + bb.0: + %0:vreg_256 = IMPLICIT_DEF + %1:vreg_256 = IMPLICIT_DEF + %2:vreg_256 = IMPLICIT_DEF + %3:vreg_256 = IMPLICIT_DEF + %4:vgpr_32 = IMPLICIT_DEF + %5:vgpr_32 = IMPLICIT_DEF + %6:vgpr_32 = V_ADD_U32_e64 %4, %5, 0, implicit $exec + %7:vgpr_32 = V_MUL_U32_U24_e64 %6, %5, 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + INLINEASM &"", 1 /* sideeffect attdialect */, implicit %0, implicit %1, implicit %2, implicit %3 + INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, def %8:vreg_256 + INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, def %9:vreg_256 + %10:vreg_256 = COPY %8 + %11:vreg_256 = COPY %9 + INLINEASM &"", 1 /* sideeffect attdialect */, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %4, implicit %5 + S_CBRANCH_SCC1 %bb.1, implicit undef $scc + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 + + bb.3.entry: +... diff --git a/llvm/test/CodeGen/SystemZ/machinelicm-sunk-kill-flags.mir b/llvm/test/CodeGen/SystemZ/machinelicm-sunk-kill-flags.mir index f23afe52f97de..e3986831d996b 100644 --- a/llvm/test/CodeGen/SystemZ/machinelicm-sunk-kill-flags.mir +++ b/llvm/test/CodeGen/SystemZ/machinelicm-sunk-kill-flags.mir @@ -3,19 +3,6 @@ # Test kill flags are cleared after sinking operations into cycle during MachineLICM. ---- | - target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64" - target triple = "s390x-unknown-linux-gnu" - - %0 = type <{ i32, [3 x i8] }> - - @b = external dso_local global [1 x %0], align 2 - - define dso_local void @c() local_unnamed_addr { - ret void - } - -... --- name: c tracksRegLiveness: true @@ -24,54 +11,26 @@ body: | ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[LARL:%[0-9]+]]:addr64bit = LARL @b - ; CHECK-NEXT: [[LA:%[0-9]+]]:gr64bit = LA killed [[LARL]], 49, $noreg - ; CHECK-NEXT: [[LGHI:%[0-9]+]]:gr64bit = LGHI 7 ; CHECK-NEXT: [[DEF:%[0-9]+]]:gr64bit = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:gr64bit = IMPLICIT_DEF ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64bit = COPY [[LA]] - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0 - ; CHECK-NEXT: $r2d = COPY [[DEF]] - ; CHECK-NEXT: $r3d = COPY [[COPY]] - ; CHECK-NEXT: $r4d = COPY [[LGHI]] - ; CHECK-NEXT: CallBRASL &memcpy, $r2d, $r3d, $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0 - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0 - ; CHECK-NEXT: $r2d = COPY [[DEF1]] - ; CHECK-NEXT: $r3d = COPY [[COPY]] - ; CHECK-NEXT: $r4d = COPY [[LGHI]] - ; CHECK-NEXT: CallBRASL &memcpy, $r2d, $r3d, $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64bit = COPY [[DEF]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64bit = COPY [[COPY]] + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY1]], implicit [[DEF]] ; CHECK-NEXT: J %bb.1 bb.0: successors: %bb.1(0x80000000) - %1:addr64bit = LARL @b - %0:gr64bit = LA killed %1, 49, $noreg - %2:gr64bit = LGHI 7 - %3:gr64bit = IMPLICIT_DEF - %5:gr64bit = IMPLICIT_DEF - %6:gr64bit = COPY killed %0 + %7:gr64bit = IMPLICIT_DEF + %8:gr64bit = COPY %7 + %9:gr64bit = COPY killed %8 bb.1: successors: %bb.1(0x80000000) - ADJCALLSTACKDOWN 0, 0 - $r2d = COPY %3 - $r3d = COPY %6 - $r4d = COPY %2 - CallBRASL &memcpy, $r2d, $r3d, $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d - ADJCALLSTACKUP 0, 0 - ADJCALLSTACKDOWN 0, 0 - $r2d = COPY %5 - $r3d = COPY %6 - $r4d = COPY %2 - CallBRASL &memcpy, $r2d, $r3d, $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d - ADJCALLSTACKUP 0, 0 + INLINEASM &"", 1 , implicit %9, implicit %7 J %bb.1 ...