diff --git a/llvm/include/llvm/ADT/GenericCycleImpl.h b/llvm/include/llvm/ADT/GenericCycleImpl.h --- a/llvm/include/llvm/ADT/GenericCycleImpl.h +++ b/llvm/include/llvm/ADT/GenericCycleImpl.h @@ -66,6 +66,45 @@ } } +template +auto GenericCycle::getCyclePreheader() const -> BlockT * { + if (!isReducible()) + return nullptr; + + BlockT *Predecessor = getCyclePredecessor(); + if (!Predecessor) + return nullptr; + + if (succ_size(Predecessor) != 1) + return nullptr; + + // Make sure we are allowed to hoist instructions into the predecessor. + if (!Predecessor->isLegalToHoistInto()) + return nullptr; + + return Predecessor; +} + +template +auto GenericCycle::getCyclePredecessor() const -> BlockT * { + if (!isReducible()) + return nullptr; + + BlockT *Out = nullptr; + + // Loop over the predecessors of the header node... + BlockT *Header = getHeader(); + for (const auto Pred : predecessors(Header)) { + if (!contains(Pred)) { + if (Out && Out != Pred) + return nullptr; + Out = Pred; + } + } + + return Out; +} + /// \brief Helper class for computing cycle information. template class GenericCycleInfoCompute { using BlockT = typename ContextT::BlockT; @@ -326,6 +365,18 @@ return nullptr; } +/// \brief get the depth for the cycle which containing a given block. +/// +/// \returns the depth for the innermost cycle containing \p Block or 0 if it is +/// not contained in any cycle. +template +unsigned GenericCycleInfo::getCycleDepth(const BlockT *Block) const { + CycleT *Cycle = getCycle(Block); + if (!Cycle) + return 0; + return Cycle->getDepth(); +} + /// \brief Validate the internal consistency of the cycle tree. /// /// Note that this does \em not check that cycles are really cycles in the CFG, diff --git a/llvm/include/llvm/ADT/GenericCycleInfo.h b/llvm/include/llvm/ADT/GenericCycleInfo.h --- a/llvm/include/llvm/ADT/GenericCycleInfo.h +++ b/llvm/include/llvm/ADT/GenericCycleInfo.h @@ -100,6 +100,10 @@ BlockT *getHeader() const { return Entries[0]; } + auto getHeaders() const -> const SmallVector & { + return Entries; + } + /// \brief Return whether \p Block is an entry block of the cycle. bool isEntry(BlockT *Block) const { return is_contained(Entries, Block); } @@ -124,6 +128,15 @@ /// branched to. void getExitBlocks(SmallVectorImpl &TmpStorage) const; + /// Return the preheader block for this cycle. This only works for reducible + /// cycles for now. + BlockT *getCyclePreheader() const; + + /// If the cycle has exactly one unique predecessor outside of the loop, + /// return it, otherwise return null. This only works for reducible cycles for + /// now. + BlockT *getCyclePredecessor() const; + /// Iteration over child cycles. //@{ using const_child_iterator_base = @@ -203,6 +216,7 @@ } }); } + //@} }; /// \brief Cycle information for a function. @@ -238,6 +252,7 @@ const ContextT &getSSAContext() const { return Context; } CycleT *getCycle(const BlockT *Block) const; + unsigned getCycleDepth(const BlockT *Block) const; CycleT *getTopLevelParentCycle(const BlockT *Block) const; /// Move \p Child to \p NewParent by manipulating Children vectors. diff --git a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h --- a/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h +++ b/llvm/include/llvm/CodeGen/MachineCycleAnalysis.h @@ -16,15 +16,49 @@ #include "llvm/ADT/GenericCycleInfo.h" #include "llvm/CodeGen/MachineSSAContext.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InitializePasses.h" namespace llvm { -extern template class GenericCycleInfo; -extern template class GenericCycle; - using MachineCycleInfo = GenericCycleInfo; using MachineCycle = MachineCycleInfo::CycleT; +/// Legacy analysis pass which computes a \ref MachineCycleInfo. +class MachineCycleInfoWrapperPass : public MachineFunctionPass { + MachineFunction *F = nullptr; + MachineCycleInfo CI; + +public: + static char ID; + + MachineCycleInfoWrapperPass(); + + MachineCycleInfo &getCycleInfo() { return CI; } + const MachineCycleInfo &getCycleInfo() const { return CI; } + + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override; + void print(raw_ostream &OS, const Module *M = nullptr) const override; + + // TODO: verify analysis +}; + +class MachineCycleInfoPrinterPass : public MachineFunctionPass { +public: + static char ID; + + MachineCycleInfoPrinterPass(); + + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +// TODO: add this function to MachineCycle template after implementing IR +// version. +bool isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I); + } // end namespace llvm #endif // LLVM_CODEGEN_MACHINECYCLEANALYSIS_H diff --git a/llvm/include/llvm/CodeGen/MachineSSAContext.h b/llvm/include/llvm/CodeGen/MachineSSAContext.h --- a/llvm/include/llvm/CodeGen/MachineSSAContext.h +++ b/llvm/include/llvm/CodeGen/MachineSSAContext.h @@ -28,6 +28,8 @@ inline auto successors(MachineBasicBlock *BB) { return BB->successors(); } inline auto predecessors(MachineBasicBlock *BB) { return BB->predecessors(); } +inline auto succ_size(MachineBasicBlock *BB) { return BB->succ_size(); } +inline auto pred_size(MachineBasicBlock *BB) { return BB->pred_size(); } template <> class GenericSSAContext { const MachineRegisterInfo *RegInfo = nullptr; diff --git a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp --- a/llvm/lib/CodeGen/MachineCycleAnalysis.cpp +++ b/llvm/lib/CodeGen/MachineCycleAnalysis.cpp @@ -6,52 +6,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/ADT/GenericCycleImpl.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineSSAContext.h" -#include "llvm/InitializePasses.h" +#include "llvm/CodeGen/MachineCycleAnalysis.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" using namespace llvm; template class llvm::GenericCycleInfo; template class llvm::GenericCycle; -namespace { - -/// Legacy analysis pass which computes a \ref MachineCycleInfo. -class MachineCycleInfoWrapperPass : public MachineFunctionPass { - MachineFunction *F = nullptr; - MachineCycleInfo CI; - -public: - static char ID; - - MachineCycleInfoWrapperPass(); - - MachineCycleInfo &getCycleInfo() { return CI; } - const MachineCycleInfo &getCycleInfo() const { return CI; } - - bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; - void releaseMemory() override; - void print(raw_ostream &OS, const Module *M = nullptr) const override; - - // TODO: verify analysis -}; - -class MachineCycleInfoPrinterPass : public MachineFunctionPass { -public: - static char ID; - - MachineCycleInfoPrinterPass(); - - bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; -}; - -} // namespace - char MachineCycleInfoWrapperPass::ID = 0; MachineCycleInfoWrapperPass::MachineCycleInfoWrapperPass() @@ -111,3 +76,62 @@ CI.print(errs()); return false; } + +bool llvm::isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I) { + MachineFunction *MF = I.getParent()->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + const TargetSubtargetInfo &ST = MF->getSubtarget(); + const TargetRegisterInfo *TRI = ST.getRegisterInfo(); + const TargetInstrInfo *TII = ST.getInstrInfo(); + + // The instruction is cycle invariant if all of its operands are. + for (const MachineOperand &MO : I.operands()) { + if (!MO.isReg()) + continue; + + Register Reg = MO.getReg(); + if (Reg == 0) + continue; + + // An instruction that uses or defines a physical register can't e.g. be + // hoisted, so mark this as not invariant. + if (Register::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + // However, if the physreg is known to always be caller saved/restored + // then this use is safe to hoist. + if (!MRI->isConstantPhysReg(Reg) && + !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) && + !TII->isIgnorableUse(MO)) + return false; + // Otherwise it's safe to move. + continue; + } else if (!MO.isDead()) { + // A def that isn't dead can't be moved. + return false; + } else if (any_of(Cycle->getHeaders(), + [&](const MachineBasicBlock *Block) { + return Block->isLiveIn(Reg); + })) { + // If the reg is live into any header of the cycle we can't hoist an + // instruction which would clobber it. + return false; + } + } + + if (!MO.isUse()) + continue; + + assert(MRI->getVRegDef(Reg) && "Machine instr not mapped for this vreg?!"); + + // If the cycle contains the definition of an operand, then the instruction + // isn't cycle invariant. + if (Cycle->contains(MRI->getVRegDef(Reg)->getParent())) + return false; + } + + // If we got this far, the instruction is cycle invariant! + return true; +} diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -119,7 +120,7 @@ MachineRegisterInfo *MRI; // Machine register information MachineDominatorTree *DT; // Machine dominator tree MachinePostDominatorTree *PDT; // Machine post dominator tree - MachineLoopInfo *LI; + MachineCycleInfo *CI; MachineBlockFrequencyInfo *MBFI; const MachineBranchProbabilityInfo *MBPI; AliasAnalysis *AA; @@ -180,8 +181,9 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); + AU.addPreserved(); AU.addPreserved(); if (UseBlockFreqInfo) AU.addRequired(); @@ -232,9 +234,9 @@ MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, bool &BreakPHIEdge, AllSuccsCache &AllSuccessors); - void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, + void FindLoopSinkCandidates(MachineCycle *L, MachineBasicBlock *BB, SmallVectorImpl &Candidates); - bool SinkIntoLoop(MachineLoop *L, MachineInstr &I); + bool SinkIntoLoop(MachineCycle *L, MachineInstr &I); bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -261,7 +263,7 @@ "Machine code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) @@ -378,7 +380,8 @@ return false; } -void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, +void MachineSinking::FindLoopSinkCandidates( + MachineCycle *L, MachineBasicBlock *BB, SmallVectorImpl &Candidates) { for (auto &MI : *BB) { LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI); @@ -387,7 +390,7 @@ "target\n"); continue; } - if (!L->isLoopInvariant(MI)) { + if (!isCycleInvariant(L, MI)) { LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n"); continue; } @@ -425,22 +428,12 @@ MRI = &MF.getRegInfo(); DT = &getAnalysis(); PDT = &getAnalysis(); - LI = &getAnalysis(); + CI = &getAnalysis().getCycleInfo(); MBFI = UseBlockFreqInfo ? &getAnalysis() : nullptr; MBPI = &getAnalysis(); AA = &getAnalysis().getAAResults(); RegClassInfo.runOnMachineFunction(MF); - // MachineSink currently uses MachineLoopInfo, which only recognizes natural - // loops. As such, we could sink instructions into irreducible cycles, which - // would be non-profitable. - // WARNING: The current implementation of hasStoreBetween() is incorrect for - // sinking into irreducible cycles (PR53990), this bailout is currently - // necessary for correctness, not just profitability. - ReversePostOrderTraversal RPOT(&*MF.begin()); - if (containsIrreducibleCFG(RPOT, *LI)) - return false; - bool EverMadeChange = false; while (true) { @@ -474,9 +467,10 @@ } if (SinkInstsIntoLoop) { - SmallVector Loops(LI->begin(), LI->end()); + SmallVector Loops(CI->toplevel_begin(), + CI->toplevel_end()); for (auto *L : Loops) { - MachineBasicBlock *Preheader = LI->findLoopPreheader(L); + MachineBasicBlock *Preheader = L->getCyclePreheader(); if (!Preheader) { LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n"); continue; @@ -649,8 +643,9 @@ return false; // Check for backedges of more "complex" loops. - if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) && - LI->isLoopHeader(ToBB)) + if (CI->getCycle(FromBB) == CI->getCycle(ToBB) && CI->getCycle(FromBB) && + (!CI->getCycle(FromBB)->isReducible() || + CI->getCycle(ToBB)->getHeader() == ToBB)) return false; // It's not always legal to break critical edges and sink the computation @@ -755,7 +750,7 @@ // It is profitable to sink an instruction from a deeper loop to a shallower // loop, even if the latter post-dominates the former (PR21115). - if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo)) + if (CI->getCycleDepth(MBB) > CI->getCycleDepth(SuccToSinkTo)) return true; // Check if only use in post dominated block is PHI instruction. @@ -776,7 +771,7 @@ FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors)) return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors); - MachineLoop *ML = LI->getLoopFor(MBB); + MachineCycle *ML = CI->getCycle(MBB); // If the instruction is not inside a loop, it is not profitable to sink MI to // a post dominate block SuccToSinkTo. @@ -826,12 +821,13 @@ return false; } else { MachineInstr *DefMI = MRI->getVRegDef(Reg); + MachineCycle *Cycle = CI->getCycle(DefMI->getParent()); // DefMI is defined outside of loop. There should be no live range // impact for this operand. Defination outside of loop means: // 1: defination is outside of loop. // 2: defination is in this loop, but it is a PHI in the loop header. - if (LI->getLoopFor(DefMI->getParent()) != ML || - (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent()))) + if (Cycle != ML || (DefMI->isPHI() && Cycle && Cycle->isReducible() && + Cycle->getHeader() == DefMI->getParent())) continue; // The DefMI is defined inside the loop. // If sinking this operand makes some register pressure set exceed limit, @@ -883,7 +879,7 @@ uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0; bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0; return HasBlockFreq ? LHSFreq < RHSFreq - : LI->getLoopDepth(L) < LI->getLoopDepth(R); + : CI->getCycleDepth(L) < CI->getCycleDepth(R); }); auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs)); @@ -1225,9 +1221,9 @@ /// Sink instructions into loops if profitable. This especially tries to prevent /// register spills caused by register pressure if there is little to no /// overhead moving instructions into loops. -bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) { +bool MachineSinking::SinkIntoLoop(MachineCycle *L, MachineInstr &I) { LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I); - MachineBasicBlock *Preheader = L->getLoopPreheader(); + MachineBasicBlock *Preheader = L->getCyclePreheader(); assert(Preheader && "Loop sink needs a preheader block"); MachineBasicBlock *SinkBlock = nullptr; bool CanSink = true; @@ -1235,7 +1231,7 @@ for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { LLVM_DEBUG(dbgs() << "LoopSink: Analysing use: " << MI); - if (!L->contains(&MI)) { + if (!L->contains(MI.getParent())) { LLVM_DEBUG(dbgs() << "LoopSink: Use not in loop, can't sink.\n"); CanSink = false; break; @@ -1408,7 +1404,9 @@ } // Don't sink instructions into a loop. - if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { + if (!TryBreak && CI->getCycle(SuccToSinkTo) && + (!CI->getCycle(SuccToSinkTo)->isReducible() || + CI->getCycle(SuccToSinkTo)->getHeader() == SuccToSinkTo)) { LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n"); TryBreak = true; } diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -132,6 +132,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -296,6 +296,7 @@ ; GCN-O1-NEXT: Machine Block Frequency Analysis ; GCN-O1-NEXT: Machine Common Subexpression Elimination ; GCN-O1-NEXT: MachinePostDominator Tree Construction +; GCN-O1-NEXT: Machine Cycle Info Analysis ; GCN-O1-NEXT: Machine code sinking ; GCN-O1-NEXT: Peephole Optimizations ; GCN-O1-NEXT: Remove dead machine instructions @@ -574,6 +575,7 @@ ; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis ; GCN-O1-OPTS-NEXT: Machine Common Subexpression Elimination ; GCN-O1-OPTS-NEXT: MachinePostDominator Tree Construction +; GCN-O1-OPTS-NEXT: Machine Cycle Info Analysis ; GCN-O1-OPTS-NEXT: Machine code sinking ; GCN-O1-OPTS-NEXT: Peephole Optimizations ; GCN-O1-OPTS-NEXT: Remove dead machine instructions @@ -861,6 +863,7 @@ ; GCN-O2-NEXT: Machine Block Frequency Analysis ; GCN-O2-NEXT: Machine Common Subexpression Elimination ; GCN-O2-NEXT: MachinePostDominator Tree Construction +; GCN-O2-NEXT: Machine Cycle Info Analysis ; GCN-O2-NEXT: Machine code sinking ; GCN-O2-NEXT: Peephole Optimizations ; GCN-O2-NEXT: Remove dead machine instructions @@ -1161,6 +1164,7 @@ ; GCN-O3-NEXT: Machine Block Frequency Analysis ; GCN-O3-NEXT: Machine Common Subexpression Elimination ; GCN-O3-NEXT: MachinePostDominator Tree Construction +; GCN-O3-NEXT: Machine Cycle Info Analysis ; GCN-O3-NEXT: Machine code sinking ; GCN-O3-NEXT: Peephole Optimizations ; GCN-O3-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -91,6 +91,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -111,6 +111,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -87,6 +87,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -107,6 +107,7 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction +; CHECK-NEXT: Machine Cycle Info Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll --- a/llvm/test/CodeGen/X86/pr38795.ll +++ b/llvm/test/CodeGen/X86/pr38795.ll @@ -32,14 +32,13 @@ ; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_15: # %for.inc +; CHECK-NEXT: .LBB0_16: # %for.inc ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: movb %dh, %dl ; CHECK-NEXT: .LBB0_1: # %for.cond ; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB0_19 Depth 2 +; CHECK-NEXT: # Child Loop BB0_20 Depth 2 ; CHECK-NEXT: cmpb $8, %dl ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: ja .LBB0_3 @@ -56,7 +55,7 @@ ; CHECK-NEXT: movb %cl, %dh ; CHECK-NEXT: movl $0, h ; CHECK-NEXT: cmpb $8, %dl -; CHECK-NEXT: jg .LBB0_9 +; CHECK-NEXT: jg .LBB0_8 ; CHECK-NEXT: # %bb.5: # %if.then13 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl %eax, %esi @@ -65,10 +64,12 @@ ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload ; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload +; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: movb %dh, %dl -; CHECK-NEXT: jne .LBB0_15 +; CHECK-NEXT: jne .LBB0_16 ; CHECK-NEXT: jmp .LBB0_6 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_3: # %if.then @@ -77,82 +78,82 @@ ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload ; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; CHECK-NEXT: jmp .LBB0_6 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_9: # %if.end21 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_10 -; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_6: # %for.cond35 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movb %dl, %dh ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: movl %edi, %esi -; CHECK-NEXT: movl $0, %edi -; CHECK-NEXT: movb %cl, %dl -; CHECK-NEXT: je .LBB0_19 -; CHECK-NEXT: # %bb.7: # %af +; CHECK-NEXT: je .LBB0_7 +; CHECK-NEXT: .LBB0_11: # %af ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_8 -; CHECK-NEXT: .LBB0_16: # %if.end39 +; CHECK-NEXT: jne .LBB0_12 +; CHECK-NEXT: .LBB0_17: # %if.end39 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je .LBB0_18 -; CHECK-NEXT: # %bb.17: # %if.then41 +; CHECK-NEXT: je .LBB0_19 +; CHECK-NEXT: # %bb.18: # %if.then41 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $fn, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $.str, (%esp) ; CHECK-NEXT: calll printf -; CHECK-NEXT: .LBB0_18: # %for.end46 +; CHECK-NEXT: .LBB0_19: # %for.end46 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl %esi, %edi ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $dh ; CHECK-NEXT: # implicit-def: $ebp +; CHECK-NEXT: jmp .LBB0_20 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_8: # %if.end21 +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: # implicit-def: $ebp +; CHECK-NEXT: jmp .LBB0_9 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_19: # %for.cond47 +; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: movb %dl, %dh +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_20: # %for.cond47 ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_19 -; CHECK-NEXT: # %bb.20: # %for.cond47 -; CHECK-NEXT: # in Loop: Header=BB0_19 Depth=2 +; CHECK-NEXT: jne .LBB0_20 +; CHECK-NEXT: # %bb.21: # %for.cond47 +; CHECK-NEXT: # in Loop: Header=BB0_20 Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_19 -; CHECK-NEXT: .LBB0_10: # %ae +; CHECK-NEXT: jne .LBB0_20 +; CHECK-NEXT: .LBB0_9: # %ae ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_11 -; CHECK-NEXT: # %bb.12: # %if.end26 +; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: # %bb.13: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je .LBB0_15 -; CHECK-NEXT: # %bb.13: # %if.end26 +; CHECK-NEXT: je .LBB0_16 +; CHECK-NEXT: # %bb.14: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: jne .LBB0_15 -; CHECK-NEXT: # %bb.14: # %if.then31 +; CHECK-NEXT: jne .LBB0_16 +; CHECK-NEXT: # %bb.15: # %if.then31 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: jmp .LBB0_15 +; CHECK-NEXT: jmp .LBB0_16 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_11: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $eax ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: je .LBB0_16 -; CHECK-NEXT: .LBB0_8: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: je .LBB0_17 +; CHECK-NEXT: .LBB0_12: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $edi ; CHECK-NEXT: # implicit-def: $cl +; CHECK-NEXT: # kill: killed $cl ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne .LBB0_11 +; CHECK-NEXT: jmp .LBB0_7 entry: br label %for.cond diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll --- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -1377,8 +1377,6 @@ ; ENABLE-NEXT: pushq %rbx ; ENABLE-NEXT: pushq %rax ; ENABLE-NEXT: .cfi_offset %rbx, -24 -; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax -; ENABLE-NEXT: movl (%rax), %edi ; ENABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax ; ENABLE-NEXT: cmpb $0, (%rax) ; ENABLE-NEXT: je LBB16_2 @@ -1388,20 +1386,24 @@ ; ENABLE-NEXT: jmp LBB16_1 ; ENABLE-NEXT: LBB16_2: ## %split ; ENABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax -; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: cmpl $0, (%rax) -; ENABLE-NEXT: je LBB16_4 -; ENABLE-NEXT: ## %bb.3: ## %for.body4.i +; ENABLE-NEXT: je LBB16_3 +; ENABLE-NEXT: ## %bb.4: ## %for.body4.i +; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax +; ENABLE-NEXT: movl (%rax), %edi ; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: callq _something +; ENABLE-NEXT: jmp LBB16_5 +; ENABLE-NEXT: LBB16_3: +; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: .p2align 4, 0x90 -; ENABLE-NEXT: LBB16_4: ## %for.inc +; ENABLE-NEXT: LBB16_5: ## %for.inc ; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: incl %ebx ; ENABLE-NEXT: cmpl $7, %ebx -; ENABLE-NEXT: jl LBB16_4 -; ENABLE-NEXT: ## %bb.5: ## %fn1.exit +; ENABLE-NEXT: jl LBB16_5 +; ENABLE-NEXT: ## %bb.6: ## %fn1.exit ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: addq $8, %rsp ; ENABLE-NEXT: popq %rbx @@ -1418,8 +1420,6 @@ ; DISABLE-NEXT: pushq %rbx ; DISABLE-NEXT: pushq %rax ; DISABLE-NEXT: .cfi_offset %rbx, -24 -; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax -; DISABLE-NEXT: movl (%rax), %edi ; DISABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax ; DISABLE-NEXT: cmpb $0, (%rax) ; DISABLE-NEXT: je LBB16_2 @@ -1429,20 +1429,24 @@ ; DISABLE-NEXT: jmp LBB16_1 ; DISABLE-NEXT: LBB16_2: ## %split ; DISABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax -; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: cmpl $0, (%rax) -; DISABLE-NEXT: je LBB16_4 -; DISABLE-NEXT: ## %bb.3: ## %for.body4.i +; DISABLE-NEXT: je LBB16_3 +; DISABLE-NEXT: ## %bb.4: ## %for.body4.i +; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax +; DISABLE-NEXT: movl (%rax), %edi ; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: callq _something +; DISABLE-NEXT: jmp LBB16_5 +; DISABLE-NEXT: LBB16_3: +; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: .p2align 4, 0x90 -; DISABLE-NEXT: LBB16_4: ## %for.inc +; DISABLE-NEXT: LBB16_5: ## %for.inc ; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: incl %ebx ; DISABLE-NEXT: cmpl $7, %ebx -; DISABLE-NEXT: jl LBB16_4 -; DISABLE-NEXT: ## %bb.5: ## %fn1.exit +; DISABLE-NEXT: jl LBB16_5 +; DISABLE-NEXT: ## %bb.6: ## %fn1.exit ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: addq $8, %rsp ; DISABLE-NEXT: popq %rbx