diff --git a/llvm/include/llvm/ADT/GenericCycleImpl.h b/llvm/include/llvm/ADT/GenericCycleImpl.h --- a/llvm/include/llvm/ADT/GenericCycleImpl.h +++ b/llvm/include/llvm/ADT/GenericCycleImpl.h @@ -66,6 +66,44 @@ } } +template +auto GenericCycle::getCyclePreheader() const -> BlockT * { + BlockT *Predecessor = getCyclePredecessor(); + if (!Predecessor) + return nullptr; + + assert(isReducible() && "Cycle Predecessor must be in a reducible cycle!"); + + if (succ_size(Predecessor) != 1) + return nullptr; + + // Make sure we are allowed to hoist instructions into the predecessor. + if (!Predecessor->isLegalToHoistInto()) + return nullptr; + + return Predecessor; +} + +template +auto GenericCycle::getCyclePredecessor() const -> BlockT * { + if (!isReducible()) + return nullptr; + + BlockT *Out = nullptr; + + // Loop over the predecessors of the header node... + BlockT *Header = getHeader(); + for (const auto Pred : predecessors(Header)) { + if (!contains(Pred)) { + if (Out && Out != Pred) + return nullptr; + Out = Pred; + } + } + + return Out; +} + /// \brief Helper class for computing cycle information. template class GenericCycleInfoCompute { using BlockT = typename ContextT::BlockT; @@ -326,6 +364,18 @@ return nullptr; } +/// \brief get the depth for the cycle which containing a given block. +/// +/// \returns the depth for the innermost cycle containing \p Block or 0 if it is +/// not contained in any cycle. +template +unsigned GenericCycleInfo::getCycleDepth(const BlockT *Block) const { + CycleT *Cycle = getCycle(Block); + if (!Cycle) + return 0; + return Cycle->getDepth(); +} + /// \brief Validate the internal consistency of the cycle tree. /// /// Note that this does \em not check that cycles are really cycles in the CFG, diff --git a/llvm/include/llvm/ADT/GenericCycleInfo.h b/llvm/include/llvm/ADT/GenericCycleInfo.h --- a/llvm/include/llvm/ADT/GenericCycleInfo.h +++ b/llvm/include/llvm/ADT/GenericCycleInfo.h @@ -100,6 +100,10 @@ BlockT *getHeader() const { return Entries[0]; } + const SmallVectorImpl & getEntries() const { + return Entries; + } + /// \brief Return whether \p Block is an entry block of the cycle. bool isEntry(BlockT *Block) const { return is_contained(Entries, Block); } @@ -124,6 +128,16 @@ /// branched to. void getExitBlocks(SmallVectorImpl &TmpStorage) const; + /// Return the preheader block for this cycle. Pre-header is well-defined for + /// reducible cycle in docs/LoopTerminology.rst as: the only one entering + /// block and its only edge is to the entry block. Return null for irreducible + /// cycles. + BlockT *getCyclePreheader() const; + + /// If the cycle has exactly one entry with exactly one predecessor, return + /// it, otherwise return nullptr. + BlockT *getCyclePredecessor() const; + /// Iteration over child cycles. //@{ using const_child_iterator_base = @@ -239,6 +253,7 @@ const ContextT &getSSAContext() const { return Context; } CycleT *getCycle(const BlockT *Block) const; + unsigned getCycleDepth(const BlockT *Block) const; CycleT *getTopLevelParentCycle(const BlockT *Block) const; /// Move \p Child to \p NewParent by manipulating Children vectors. diff --git a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h --- a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h +++ b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h @@ -16,6 +16,8 @@ #include "llvm/ADT/GenericCycleInfo.h" #include "llvm/CodeGen/MachineSSAContext.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InitializePasses.h" namespace llvm { @@ -25,6 +27,41 @@ using MachineCycleInfo = GenericCycleInfo; using MachineCycle = MachineCycleInfo::CycleT; +/// Legacy analysis pass which computes a \ref MachineCycleInfo. +class MachineCycleInfoWrapperPass : public MachineFunctionPass { + MachineFunction *F = nullptr; + MachineCycleInfo CI; + +public: + static char ID; + + MachineCycleInfoWrapperPass(); + + MachineCycleInfo &getCycleInfo() { return CI; } + const MachineCycleInfo &getCycleInfo() const { return CI; } + + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override; + void print(raw_ostream &OS, const Module *M = nullptr) const override; + + // TODO: verify analysis +}; + +class MachineCycleInfoPrinterPass : public MachineFunctionPass { +public: + static char ID; + + MachineCycleInfoPrinterPass(); + + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +// TODO: add this function to GenericCycle template after implementing IR +// version. +bool isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I); + } // end namespace llvm #endif // LLVM_CODEGEN_MACHINECYCLEANALYSIS_H diff --git a/llvm/include/llvm/CodeGen/MachineSSAContext.h b/llvm/include/llvm/CodeGen/MachineSSAContext.h --- a/llvm/include/llvm/CodeGen/MachineSSAContext.h +++ b/llvm/include/llvm/CodeGen/MachineSSAContext.h @@ -28,6 +28,8 @@ inline auto successors(MachineBasicBlock *BB) { return BB->successors(); } inline auto predecessors(MachineBasicBlock *BB) { return BB->predecessors(); } +inline unsigned succ_size(MachineBasicBlock *BB) { return BB->succ_size(); } +inline unsigned pred_size(MachineBasicBlock *BB) { return BB->pred_size(); } template <> class GenericSSAContext { const MachineRegisterInfo *RegInfo = nullptr; diff --git a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp --- a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp +++ b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp @@ -8,50 +8,15 @@ #include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/ADT/GenericCycleImpl.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineSSAContext.h" -#include "llvm/InitializePasses.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" using namespace llvm; template class llvm::GenericCycleInfo; template class llvm::GenericCycle; -namespace { - -/// Legacy analysis pass which computes a \ref MachineCycleInfo. -class MachineCycleInfoWrapperPass : public MachineFunctionPass { - MachineFunction *F = nullptr; - MachineCycleInfo CI; - -public: - static char ID; - - MachineCycleInfoWrapperPass(); - - MachineCycleInfo &getCycleInfo() { return CI; } - const MachineCycleInfo &getCycleInfo() const { return CI; } - - bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; - void releaseMemory() override; - void print(raw_ostream &OS, const Module *M = nullptr) const override; - - // TODO: verify analysis -}; - -class MachineCycleInfoPrinterPass : public MachineFunctionPass { -public: - static char ID; - - MachineCycleInfoPrinterPass(); - - bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; -}; - -} // namespace - char MachineCycleInfoWrapperPass::ID = 0; MachineCycleInfoWrapperPass::MachineCycleInfoWrapperPass() @@ -111,3 +76,62 @@ CI.print(errs()); return false; } + +bool llvm::isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I) { + MachineFunction *MF = I.getParent()->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + const TargetSubtargetInfo &ST = MF->getSubtarget(); + const TargetRegisterInfo *TRI = ST.getRegisterInfo(); + const TargetInstrInfo *TII = ST.getInstrInfo(); + + // The instruction is cycle invariant if all of its operands are. + for (const MachineOperand &MO : I.operands()) { + if (!MO.isReg()) + continue; + + Register Reg = MO.getReg(); + if (Reg == 0) + continue; + + // An instruction that uses or defines a physical register can't e.g. be + // hoisted, so mark this as not invariant. + if (Register::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + // However, if the physreg is known to always be caller saved/restored + // then this use is safe to hoist. + if (!MRI->isConstantPhysReg(Reg) && + !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) && + !TII->isIgnorableUse(MO)) + return false; + // Otherwise it's safe to move. + continue; + } else if (!MO.isDead()) { + // A def that isn't dead can't be moved. + return false; + } else if (any_of(Cycle->getEntries(), + [&](const MachineBasicBlock *Block) { + return Block->isLiveIn(Reg); + })) { + // If the reg is live into any header of the cycle we can't hoist an + // instruction which would clobber it. + return false; + } + } + + if (!MO.isUse()) + continue; + + assert(MRI->getVRegDef(Reg) && "Machine instr not mapped for this vreg?!"); + + // If the cycle contains the definition of an operand, then the instruction + // isn't cycle invariant. + if (Cycle->contains(MRI->getVRegDef(Reg)->getParent())) + return false; + } + + // If we got this far, the instruction is cycle invariant! + return true; +} diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -95,18 +96,18 @@ cl::init(20), cl::Hidden); static cl::opt -SinkInstsIntoLoop("sink-insts-to-avoid-spills", - cl::desc("Sink instructions into loops to avoid " - "register spills"), - cl::init(false), cl::Hidden); - -static cl::opt SinkIntoLoopLimit( - "machine-sink-loop-limit", - cl::desc("The maximum number of instructions considered for loop sinking."), + SinkInstsIntoCycle("sink-insts-to-avoid-spills", + cl::desc("Sink instructions into cycles to avoid " + "register spills"), + cl::init(false), cl::Hidden); + +static cl::opt SinkIntoCycleLimit( + "machine-sink-cycle-limit", + cl::desc("The maximum number of instructions considered for cycle sinking."), cl::init(50), cl::Hidden); STATISTIC(NumSunk, "Number of machine instructions sunk"); -STATISTIC(NumLoopSunk, "Number of machine instructions sunk into a loop"); +STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle"); STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumPostRACopySink, "Number of copies sunk after RA"); @@ -119,7 +120,7 @@ MachineRegisterInfo *MRI; // Machine register information MachineDominatorTree *DT; // Machine dominator tree MachinePostDominatorTree *PDT; // Machine post dominator tree - MachineLoopInfo *LI; + MachineCycleInfo *CI; MachineBlockFrequencyInfo *MBFI; const MachineBranchProbabilityInfo *MBPI; AliasAnalysis *AA; @@ -180,8 +181,9 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); + AU.addPreserved(); AU.addPreserved(); if (UseBlockFreqInfo) AU.addRequired(); @@ -232,9 +234,9 @@ MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, bool &BreakPHIEdge, AllSuccsCache &AllSuccessors); - void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, - SmallVectorImpl &Candidates); - bool SinkIntoLoop(MachineLoop *L, MachineInstr &I); + void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB, + SmallVectorImpl &Candidates); + bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I); bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -261,7 +263,7 @@ "Machine code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) @@ -378,26 +380,27 @@ return false; } -void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, +void MachineSinking::FindCycleSinkCandidates( + MachineCycle *Cycle, MachineBasicBlock *BB, SmallVectorImpl &Candidates) { for (auto &MI : *BB) { - LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI); + LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI); if (!TII->shouldSink(MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Instruction not a candidate for this " + LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this " "target\n"); continue; } - if (!L->isLoopInvariant(MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n"); + if (!isCycleInvariant(Cycle, MI)) { + LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n"); continue; } bool DontMoveAcrossStore = true; if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) { - LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n"); continue; } if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n"); continue; } if (MI.isConvergent()) @@ -409,7 +412,7 @@ if (!MRI->hasOneDef(MO.getReg())) continue; - LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n"); Candidates.push_back(&MI); } } @@ -425,22 +428,12 @@ MRI = &MF.getRegInfo(); DT = &getAnalysis(); PDT = &getAnalysis(); - LI = &getAnalysis(); + CI = &getAnalysis().getCycleInfo(); MBFI = UseBlockFreqInfo ? &getAnalysis() : nullptr; MBPI = &getAnalysis(); AA = &getAnalysis().getAAResults(); RegClassInfo.runOnMachineFunction(MF); - // MachineSink currently uses MachineLoopInfo, which only recognizes natural - // loops. As such, we could sink instructions into irreducible cycles, which - // would be non-profitable. - // WARNING: The current implementation of hasStoreBetween() is incorrect for - // sinking into irreducible cycles (PR53990), this bailout is currently - // necessary for correctness, not just profitability. - ReversePostOrderTraversal RPOT(&*MF.begin()); - if (containsIrreducibleCFG(RPOT, *LI)) - return false; - bool EverMadeChange = false; while (true) { @@ -473,32 +466,33 @@ EverMadeChange = true; } - if (SinkInstsIntoLoop) { - SmallVector Loops(LI->begin(), LI->end()); - for (auto *L : Loops) { - MachineBasicBlock *Preheader = LI->findLoopPreheader(L); + if (SinkInstsIntoCycle) { + SmallVector Cycles(CI->toplevel_begin(), + CI->toplevel_end()); + for (auto *Cycle : Cycles) { + MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); if (!Preheader) { - LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n"); continue; } SmallVector Candidates; - FindLoopSinkCandidates(L, Preheader, Candidates); + FindCycleSinkCandidates(Cycle, Preheader, Candidates); // Walk the candidates in reverse order so that we start with the use // of a def-use chain, if there is any. // TODO: Sort the candidates using a cost-model. unsigned i = 0; for (MachineInstr *I : llvm::reverse(Candidates)) { - if (i++ == SinkIntoLoopLimit) { - LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to " + if (i++ == SinkIntoCycleLimit) { + LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to " "be analysed."); break; } - if (!SinkIntoLoop(L, *I)) + if (!SinkIntoCycle(Cycle, *I)) break; EverMadeChange = true; - ++NumLoopSunk; + ++NumCycleSunk; } } } @@ -520,12 +514,12 @@ // Don't bother sinking code out of unreachable blocks. In addition to being // unprofitable, it can also lead to infinite looping, because in an - // unreachable loop there may be nowhere to stop. + // unreachable cycle there may be nowhere to stop. if (!DT->isReachableFromEntry(&MBB)) return false; bool MadeChange = false; - // Cache all successors, sorted by frequency info and loop depth. + // Cache all successors, sorted by frequency info and cycle depth. AllSuccsCache AllSuccessors; // Walk the basic block bottom-up. Remember if we saw a store. @@ -644,13 +638,16 @@ if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB)) return false; - // Avoid breaking back edge. From == To means backedge for single BB loop. + // Avoid breaking back edge. From == To means backedge for single BB cycle. if (!SplitEdges || FromBB == ToBB) return false; - // Check for backedges of more "complex" loops. - if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) && - LI->isLoopHeader(ToBB)) + MachineCycle *FromCycle = CI->getCycle(FromBB); + MachineCycle *ToCycle = CI->getCycle(ToBB); + + // Check for backedges of more "complex" cycles. + if (FromCycle == ToCycle && FromCycle && + (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB)) return false; // It's not always legal to break critical edges and sink the computation @@ -753,9 +750,9 @@ if (!PDT->dominates(SuccToSinkTo, MBB)) return true; - // It is profitable to sink an instruction from a deeper loop to a shallower - // loop, even if the latter post-dominates the former (PR21115). - if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo)) + // It is profitable to sink an instruction from a deeper cycle to a shallower + // cycle, even if the latter post-dominates the former (PR21115). + if (CI->getCycleDepth(MBB) > CI->getCycleDepth(SuccToSinkTo)) return true; // Check if only use in post dominated block is PHI instruction. @@ -776,11 +773,11 @@ FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors)) return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors); - MachineLoop *ML = LI->getLoopFor(MBB); + MachineCycle *MCycle = CI->getCycle(MBB); - // If the instruction is not inside a loop, it is not profitable to sink MI to + // If the instruction is not inside a cycle, it is not profitable to sink MI to // a post dominate block SuccToSinkTo. - if (!ML) + if (!MCycle) return false; auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) { @@ -798,7 +795,7 @@ return false; }; - // If this instruction is inside a loop and sinking this instruction can make + // If this instruction is inside a Cycle and sinking this instruction can make // more registers live range shorten, it is still prifitable. for (const MachineOperand &MO : MI.operands()) { // Ignore non-register operands. @@ -826,14 +823,15 @@ return false; } else { MachineInstr *DefMI = MRI->getVRegDef(Reg); - // DefMI is defined outside of loop. There should be no live range - // impact for this operand. Defination outside of loop means: - // 1: defination is outside of loop. - // 2: defination is in this loop, but it is a PHI in the loop header. - if (LI->getLoopFor(DefMI->getParent()) != ML || - (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent()))) + MachineCycle *Cycle = CI->getCycle(DefMI->getParent()); + // DefMI is defined outside of cycle. There should be no live range + // impact for this operand. Defination outside of cycle means: + // 1: defination is outside of cycle. + // 2: defination is in this cycle, but it is a PHI in the cycle header. + if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() && + Cycle->getHeader() == DefMI->getParent())) continue; - // The DefMI is defined inside the loop. + // The DefMI is defined inside the cycle. // If sinking this operand makes some register pressure set exceed limit, // it is not profitable. if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) { @@ -843,8 +841,8 @@ } } - // If MI is in loop and all its operands are alive across the whole loop or if - // no operand sinking make register pressure set exceed limit, it is + // If MI is in cycle and all its operands are alive across the whole cycle or + // if no operand sinking make register pressure set exceed limit, it is // profitable to sink MI. return true; } @@ -876,14 +874,14 @@ AllSuccs.push_back(DTChild->getBlock()); } - // Sort Successors according to their loop depth or block frequency info. + // Sort Successors according to their cycle depth or block frequency info. llvm::stable_sort( AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) { uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0; uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0; bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0; return HasBlockFreq ? LHSFreq < RHSFreq - : LI->getLoopDepth(L) < LI->getLoopDepth(R); + : CI->getCycleDepth(L) < CI->getCycleDepth(R); }); auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs)); @@ -898,7 +896,7 @@ AllSuccsCache &AllSuccessors) { assert (MBB && "Invalid MachineBasicBlock!"); - // Loop over all the operands of the specified instruction. If there is + // loop over all the operands of the specified instruction. If there is // anything we can't handle, bail out. // SuccToSinkTo - This is the successor to sink this instruction to, once we @@ -945,7 +943,7 @@ // Otherwise, we should look at all the successors and decide which one // we should sink to. If we have reliable block frequency information // (frequency != 0) available, give successors with smaller frequencies - // higher priority, otherwise prioritize smaller loop depths. + // higher priority, otherwise prioritize smaller cycle depths. for (MachineBasicBlock *SuccBlock : GetAllSortedSuccessors(MI, MBB, AllSuccessors)) { bool LocalUse = false; @@ -968,7 +966,7 @@ } // It is not possible to sink an instruction into its own block. This can - // happen with loops. + // happen with cycles. if (MBB == SuccToSinkTo) return nullptr; @@ -1222,68 +1220,70 @@ return HasAliasedStore; } -/// Sink instructions into loops if profitable. This especially tries to prevent -/// register spills caused by register pressure if there is little to no -/// overhead moving instructions into loops. -bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) { - LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I); - MachineBasicBlock *Preheader = L->getLoopPreheader(); - assert(Preheader && "Loop sink needs a preheader block"); +/// Sink instructions into cycles if profitable. This especially tries to +/// prevent register spills caused by register pressure if there is little to no +/// overhead moving instructions into cycles. +bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) { + LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I); + MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); + assert(Preheader && "Cycle sink needs a preheader block"); MachineBasicBlock *SinkBlock = nullptr; bool CanSink = true; const MachineOperand &MO = I.getOperand(0); for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { - LLVM_DEBUG(dbgs() << "LoopSink: Analysing use: " << MI); - if (!L->contains(&MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Use not in loop, can't sink.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI); + if (!Cycle->contains(MI.getParent())) { + LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n"); CanSink = false; break; } // FIXME: Come up with a proper cost model that estimates whether sinking - // the instruction (and thus possibly executing it on every loop + // the instruction (and thus possibly executing it on every cycle // iteration) is more expensive than a register. // For now assumes that copies are cheap and thus almost always worth it. if (!MI.isCopy()) { - LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n"); CanSink = false; break; } if (!SinkBlock) { SinkBlock = MI.getParent(); - LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: " + LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: " << printMBBReference(*SinkBlock) << "\n"); continue; } SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "LoopSink: Can't find nearest dominator\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n"); CanSink = false; break; } - LLVM_DEBUG(dbgs() << "LoopSink: Setting nearest common dom block: " << + LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: " << printMBBReference(*SinkBlock) << "\n"); } if (!CanSink) { - LLVM_DEBUG(dbgs() << "LoopSink: Can't sink instruction.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n"); return false; } if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, can't find sink block.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n"); return false; } if (SinkBlock == Preheader) { - LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n"); + LLVM_DEBUG( + dbgs() << "CycleSink: Not sinking, sink block is the preheader\n"); return false; } if (SinkBlock->size() > SinkLoadInstsPerBlockThreshold) { - LLVM_DEBUG(dbgs() << "LoopSink: Not Sinking, block too large to analyse.\n"); + LLVM_DEBUG( + dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n"); return false; } - LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n"); SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader, I); @@ -1407,9 +1407,11 @@ TryBreak = true; } - // Don't sink instructions into a loop. - if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { - LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n"); + // Don't sink instructions into a cycle. + if (!TryBreak && CI->getCycle(SuccToSinkTo) && + (!CI->getCycle(SuccToSinkTo)->isReducible() || + CI->getCycle(SuccToSinkTo)->getHeader() == SuccToSinkTo)) { + LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n"); TryBreak = true; } diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -132,6 +132,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/AArch64/loop-sink-limit.mir b/llvm/test/CodeGen/AArch64/loop-sink-limit.mir --- a/llvm/test/CodeGen/AArch64/loop-sink-limit.mir +++ b/llvm/test/CodeGen/AArch64/loop-sink-limit.mir @@ -1,10 +1,10 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills \ -# RUN: -machine-sink-loop-limit=1 -verify-machineinstrs %s -o - 2>&1 | \ +# RUN: -machine-sink-cycle-limit=1 -verify-machineinstrs %s -o - 2>&1 | \ # RUN: FileCheck %s --check-prefix=SINK1 # # RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills \ -# RUN: -machine-sink-loop-limit=2 -verify-machineinstrs %s -o - 2>&1 | \ +# RUN: -machine-sink-cycle-limit=2 -verify-machineinstrs %s -o - 2>&1 | \ # RUN: FileCheck %s --check-prefix=SINK2 --- | diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -296,6 +296,7 @@ ; GCN-O1-NEXT: Machine Block Frequency Analysis ; GCN-O1-NEXT: Machine Common Subexpression Elimination ; GCN-O1-NEXT: MachinePostDominator Tree Construction +; GCN-O1-NEXT: Machine Cycle Info Analysis ; GCN-O1-NEXT: Machine code sinking ; GCN-O1-NEXT: Peephole Optimizations ; GCN-O1-NEXT: Remove dead machine instructions @@ -574,6 +575,7 @@ ; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis ; GCN-O1-OPTS-NEXT: Machine Common Subexpression Elimination ; GCN-O1-OPTS-NEXT: MachinePostDominator Tree Construction +; GCN-O1-OPTS-NEXT: Machine Cycle Info Analysis ; GCN-O1-OPTS-NEXT: Machine code sinking ; GCN-O1-OPTS-NEXT: Peephole Optimizations ; GCN-O1-OPTS-NEXT: Remove dead machine instructions @@ -861,6 +863,7 @@ ; GCN-O2-NEXT: Machine Block Frequency Analysis ; GCN-O2-NEXT: Machine Common Subexpression Elimination ; GCN-O2-NEXT: MachinePostDominator Tree Construction +; GCN-O2-NEXT: Machine Cycle Info Analysis ; GCN-O2-NEXT: Machine code sinking ; GCN-O2-NEXT: Peephole Optimizations ; GCN-O2-NEXT: Remove dead machine instructions @@ -1161,6 +1164,7 @@ ; GCN-O3-NEXT: Machine Block Frequency Analysis ; GCN-O3-NEXT: Machine Common Subexpression Elimination ; GCN-O3-NEXT: MachinePostDominator Tree Construction +; GCN-O3-NEXT: Machine Cycle Info Analysis ; GCN-O3-NEXT: Machine code sinking ; GCN-O3-NEXT: Peephole Optimizations ; GCN-O3-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -91,6 +91,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -111,6 +111,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -90,6 +90,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -107,6 +107,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll --- a/llvm/test/CodeGen/X86/pr38795.ll +++ b/llvm/test/CodeGen/X86/pr38795.ll @@ -32,14 +32,13 @@ ; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_15: # %for.inc +; CHECK-NEXT: .LBB0_16: # %for.inc ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: movb %dh, %dl ; CHECK-NEXT: .LBB0_1: # %for.cond ; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB0_19 Depth 2 +; CHECK-NEXT: # Child Loop BB0_20 Depth 2 ; CHECK-NEXT: cmpb $8, %dl ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: ja .LBB0_3 @@ -56,7 +55,7 @@ ; CHECK-NEXT: movb %cl, %dh ; CHECK-NEXT: movl $0, h ; CHECK-NEXT: cmpb $8, %dl -; CHECK-NEXT: jg .LBB0_9 +; CHECK-NEXT: jg .LBB0_8 ; CHECK-NEXT: # %bb.5: # %if.then13 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl %eax, %esi @@ -65,10 +64,12 @@ ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload ; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload +; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: movb %dh, %dl -; CHECK-NEXT: jne .LBB0_15 +; CHECK-NEXT: jne .LBB0_16 ; CHECK-NEXT: jmp .LBB0_6 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_3: # %if.then @@ -77,82 +78,82 @@ ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload ; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; CHECK-NEXT: jmp .LBB0_6 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_9: # %if.end21 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_10 -; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_6: # %for.cond35 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movb %dl, %dh ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: movl %edi, %esi -; CHECK-NEXT: movl $0, %edi -; CHECK-NEXT: movb %cl, %dl -; CHECK-NEXT: je .LBB0_19 -; CHECK-NEXT: # %bb.7: # %af +; CHECK-NEXT: je .LBB0_7 +; CHECK-NEXT: .LBB0_11: # %af ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_8 -; CHECK-NEXT: .LBB0_16: # %if.end39 +; CHECK-NEXT: jne .LBB0_12 +; CHECK-NEXT: .LBB0_17: # %if.end39 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je .LBB0_18 -; CHECK-NEXT: # %bb.17: # %if.then41 +; CHECK-NEXT: je .LBB0_19 +; CHECK-NEXT: # %bb.18: # %if.then41 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $fn, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $.str, (%esp) ; CHECK-NEXT: calll printf -; CHECK-NEXT: .LBB0_18: # %for.end46 +; CHECK-NEXT: .LBB0_19: # %for.end46 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl %esi, %edi ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $dh ; CHECK-NEXT: # implicit-def: $ebp +; CHECK-NEXT: jmp .LBB0_20 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_8: # %if.end21 +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: # implicit-def: $ebp +; CHECK-NEXT: jmp .LBB0_9 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_19: # %for.cond47 +; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: movb %dl, %dh +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_20: # %for.cond47 ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_19 -; CHECK-NEXT: # %bb.20: # %for.cond47 -; CHECK-NEXT: # in Loop: Header=BB0_19 Depth=2 +; CHECK-NEXT: jne .LBB0_20 +; CHECK-NEXT: # %bb.21: # %for.cond47 +; CHECK-NEXT: # in Loop: Header=BB0_20 Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_19 -; CHECK-NEXT: .LBB0_10: # %ae +; CHECK-NEXT: jne .LBB0_20 +; CHECK-NEXT: .LBB0_9: # %ae ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_11 -; CHECK-NEXT: # %bb.12: # %if.end26 +; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: # %bb.13: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je .LBB0_15 -; CHECK-NEXT: # %bb.13: # %if.end26 +; CHECK-NEXT: je .LBB0_16 +; CHECK-NEXT: # %bb.14: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: jne .LBB0_15 -; CHECK-NEXT: # %bb.14: # %if.then31 +; CHECK-NEXT: jne .LBB0_16 +; CHECK-NEXT: # %bb.15: # %if.then31 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: jmp .LBB0_15 +; CHECK-NEXT: jmp .LBB0_16 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_11: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $eax ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: je .LBB0_16 -; CHECK-NEXT: .LBB0_8: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: je .LBB0_17 +; CHECK-NEXT: .LBB0_12: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $edi ; CHECK-NEXT: # implicit-def: $cl +; CHECK-NEXT: # kill: killed $cl ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne .LBB0_11 +; CHECK-NEXT: jmp .LBB0_7 entry: br label %for.cond diff --git a/llvm/test/CodeGen/X86/switch-phi-const.ll b/llvm/test/CodeGen/X86/switch-phi-const.ll --- a/llvm/test/CodeGen/X86/switch-phi-const.ll +++ b/llvm/test/CodeGen/X86/switch-phi-const.ll @@ -92,37 +92,37 @@ ; CHECK-LABEL: switch_trunc_phi_const: ; CHECK: # %bb.0: # %bb0 ; CHECK-NEXT: movzbl %dil, %r8d -; CHECK-NEXT: movl $3895, %ecx # imm = 0xF37 -; CHECK-NEXT: movl $42, %esi -; CHECK-NEXT: movl $13, %edx -; CHECK-NEXT: movl $5, %edi -; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: decl %r8d ; CHECK-NEXT: cmpl $54, %r8d ; CHECK-NEXT: ja .LBB1_8 ; CHECK-NEXT: # %bb.1: # %bb0 +; CHECK-NEXT: movl $3895, %edx # imm = 0xF37 +; CHECK-NEXT: movl $42, %edi +; CHECK-NEXT: movl $13, %esi +; CHECK-NEXT: movl $5, %ecx +; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: jmpq *.LJTI1_0(,%r8,8) ; CHECK-NEXT: .LBB1_8: # %default ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB1_2: # %case_1_loop ; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rcx ; CHECK-NEXT: movq $1, (%rcx) -; CHECK-NEXT: movq %rax, %rdi +; CHECK-NEXT: movq %rax, %rcx ; CHECK-NEXT: .LBB1_3: # %case_5 ; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rax ; CHECK-NEXT: movq $5, (%rax) -; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: movq %rcx, %rsi ; CHECK-NEXT: .LBB1_4: # %case_13 ; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rax ; CHECK-NEXT: movq $13, (%rax) -; CHECK-NEXT: movq %rdx, %rsi +; CHECK-NEXT: movq %rsi, %rdi ; CHECK-NEXT: .LBB1_5: # %case_42 ; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rax -; CHECK-NEXT: movq %rsi, (%rax) -; CHECK-NEXT: movl $55, %ecx +; CHECK-NEXT: movq %rdi, (%rax) +; CHECK-NEXT: movl $55, %edx ; CHECK-NEXT: .LBB1_6: # %case_55 ; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rax -; CHECK-NEXT: movq %rcx, (%rax) +; CHECK-NEXT: movq %rdx, (%rax) ; CHECK-NEXT: .LBB1_7: # %case_7 ; CHECK-NEXT: movq g64@GOTPCREL(%rip), %rax ; CHECK-NEXT: movq (%rax), %rax diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll --- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -1377,8 +1377,6 @@ ; ENABLE-NEXT: pushq %rbx ; ENABLE-NEXT: pushq %rax ; ENABLE-NEXT: .cfi_offset %rbx, -24 -; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax -; ENABLE-NEXT: movl (%rax), %edi ; ENABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax ; ENABLE-NEXT: cmpb $0, (%rax) ; ENABLE-NEXT: je LBB16_2 @@ -1388,20 +1386,24 @@ ; ENABLE-NEXT: jmp LBB16_1 ; ENABLE-NEXT: LBB16_2: ## %split ; ENABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax -; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: cmpl $0, (%rax) -; ENABLE-NEXT: je LBB16_4 -; ENABLE-NEXT: ## %bb.3: ## %for.body4.i +; ENABLE-NEXT: je LBB16_3 +; ENABLE-NEXT: ## %bb.4: ## %for.body4.i +; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax +; ENABLE-NEXT: movl (%rax), %edi ; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: callq _something +; ENABLE-NEXT: jmp LBB16_5 +; ENABLE-NEXT: LBB16_3: +; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: .p2align 4, 0x90 -; ENABLE-NEXT: LBB16_4: ## %for.inc +; ENABLE-NEXT: LBB16_5: ## %for.inc ; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: incl %ebx ; ENABLE-NEXT: cmpl $7, %ebx -; ENABLE-NEXT: jl LBB16_4 -; ENABLE-NEXT: ## %bb.5: ## %fn1.exit +; ENABLE-NEXT: jl LBB16_5 +; ENABLE-NEXT: ## %bb.6: ## %fn1.exit ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: addq $8, %rsp ; ENABLE-NEXT: popq %rbx @@ -1418,8 +1420,6 @@ ; DISABLE-NEXT: pushq %rbx ; DISABLE-NEXT: pushq %rax ; DISABLE-NEXT: .cfi_offset %rbx, -24 -; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax -; DISABLE-NEXT: movl (%rax), %edi ; DISABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax ; DISABLE-NEXT: cmpb $0, (%rax) ; DISABLE-NEXT: je LBB16_2 @@ -1429,20 +1429,24 @@ ; DISABLE-NEXT: jmp LBB16_1 ; DISABLE-NEXT: LBB16_2: ## %split ; DISABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax -; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: cmpl $0, (%rax) -; DISABLE-NEXT: je LBB16_4 -; DISABLE-NEXT: ## %bb.3: ## %for.body4.i +; DISABLE-NEXT: je LBB16_3 +; DISABLE-NEXT: ## %bb.4: ## %for.body4.i +; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax +; DISABLE-NEXT: movl (%rax), %edi ; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: callq _something +; DISABLE-NEXT: jmp LBB16_5 +; DISABLE-NEXT: LBB16_3: +; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: .p2align 4, 0x90 -; DISABLE-NEXT: LBB16_4: ## %for.inc +; DISABLE-NEXT: LBB16_5: ## %for.inc ; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: incl %ebx ; DISABLE-NEXT: cmpl $7, %ebx -; DISABLE-NEXT: jl LBB16_4 -; DISABLE-NEXT: ## %bb.5: ## %fn1.exit +; DISABLE-NEXT: jl LBB16_5 +; DISABLE-NEXT: ## %bb.6: ## %fn1.exit ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: addq $8, %rsp ; DISABLE-NEXT: popq %rbx