Index: include/llvm/CodeGen/MachineFrameInfo.h =================================================================== --- include/llvm/CodeGen/MachineFrameInfo.h +++ include/llvm/CodeGen/MachineFrameInfo.h @@ -15,6 +15,7 @@ #define LLVM_CODEGEN_MACHINEFRAMEINFO_H #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/Support/DataTypes.h" #include #include @@ -25,7 +26,6 @@ class TargetRegisterClass; class Type; class MachineFunction; -class MachineBasicBlock; class TargetFrameLowering; class TargetMachine; class BitVector; @@ -272,12 +272,36 @@ /// stack objects like arguments so we can't treat them as immutable. bool HasTailCall = false; + // FIXME: ShrinkWrap2: Deprecate. /// Not null, if shrink-wrapping found a better place for the prologue. MachineBasicBlock *Save = nullptr; /// Not null, if shrink-wrapping found a better place for the epilogue. MachineBasicBlock *Restore = nullptr; public: + /// Map a set of registers to a basic block. This is a replacement for CSInfo + /// with extra information about the location of the saves / restores pinned + /// to a basic block. One register may appear more than once in the map, as + /// long as it is associated to a different basic block. The CSIs may share + /// frame indexes for different registers, for different basic blocks. + /// Similar to CSInfo, the frame indexes in the CalleeSavedInfo struct are + /// valid ony if CSIValid is true. + typedef DenseMap> + CalleeSavedMap; + +private: + /// If any, contains better save points for the prologue found by + /// shrink-wrapping. + CalleeSavedMap Saves; + /// If any, contains better restore points for the epilogue found by + /// shrink-wrapping. + CalleeSavedMap Restores; + /// Should the PrologEpilogInserter and the various target hooks use the + /// information gathered from shrink-wrapping? + // FIXME: ShrinkWrap2: Fix name. + bool ShouldUseShrinkWrap2 = false; + +public: explicit MachineFrameInfo(unsigned StackAlignment, bool StackRealignable, bool ForcedRealign) : StackAlignment(StackAlignment), StackRealignable(StackRealignable), @@ -647,10 +671,43 @@ void setCalleeSavedInfoValid(bool v) { CSIValid = v; } + // FIXME: ShrinkWrap2: Merge with multiple points. MachineBasicBlock *getSavePoint() const { return Save; } - void setSavePoint(MachineBasicBlock *NewSave) { Save = NewSave; } + void setSavePoint(MachineBasicBlock *NewSave) { + assert(Saves.empty() && Restores.empty() && + "Mixing shrink-wrapping results."); + Save = NewSave; + } MachineBasicBlock *getRestorePoint() const { return Restore; } - void setRestorePoint(MachineBasicBlock *NewRestore) { Restore = NewRestore; } + void setRestorePoint(MachineBasicBlock *NewRestore) { + assert(Saves.empty() && Restores.empty() && + "Mixing shrink-wrapping results."); + Restore = NewRestore; + } + + // FIXME: ShrinkWrap2: Merge with old shrink-wrapping. + // FIXME: ShrinkWrap2: Provide setSaves / setRestores instead of non-const ref + // to the map? + CalleeSavedMap &getSaves() { + assert(!Save && !Restore && "Mixing shrink-wrapping results."); + return Saves; + } + CalleeSavedMap &getRestores() { + assert(!Save && !Restore && "Mixing shrink-wrapping results."); + return Restores; + } + const CalleeSavedMap &getSaves() const { + assert(!Save && !Restore && "Mixing shrink-wrapping results."); + return Saves; + } + const CalleeSavedMap &getRestores() const { + assert(!Save && !Restore && "Mixing shrink-wrapping results."); + return Restores; + } + // FIXME: ShrinkWrap2: Name. + bool getShouldUseShrinkWrap2() const { return ShouldUseShrinkWrap2; } + // FIXME: ShrinkWrap2: Name. + void setShouldUseShrinkWrap2(bool New) { ShouldUseShrinkWrap2 = New; } /// Return a set of physical registers that are pristine. /// Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -128,6 +128,8 @@ /// ShrinkWrap pass. Look for the best place to insert save and restore // instruction and update the MachineFunctionInfo with that information. extern char &ShrinkWrapID; + // FIXME: ShrinkWrap2: Merge. + extern char &ShrinkWrap2ID; /// Greedy register allocator. extern char &RAGreedyID; Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -328,6 +328,8 @@ void initializeSeparateConstOffsetFromGEPPass(PassRegistry&); void initializeShadowStackGCLoweringPass(PassRegistry&); void initializeShrinkWrapPass(PassRegistry&); +// FIXME: ShrinkWrap2: Merge. +void initializeShrinkWrap2Pass(PassRegistry&); void initializeSimpleInlinerPass(PassRegistry&); void initializeSingleLoopExtractorPass(PassRegistry&); void initializeSinkingLegacyPassPass(PassRegistry&); Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -123,6 +123,8 @@ ScoreboardHazardRecognizer.cpp ShadowStackGCLowering.cpp ShrinkWrap.cpp +# FIXME: ShrinkWrap2: Merge. + ShrinkWrap2.cpp SjLjEHPrepare.cpp SlotIndexes.cpp SpillPlacement.cpp Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -81,6 +81,8 @@ initializeRenameIndependentSubregsPass(Registry); initializeSafeStackPass(Registry); initializeShrinkWrapPass(Registry); + // FIXME: ShrinkWrap2: Merge. + initializeShrinkWrap2Pass(Registry); initializeSlotIndexesPass(Registry); initializeStackColoringPass(Registry); initializeStackMapLivenessPass(Registry); Index: lib/CodeGen/PrologEpilogInserter.cpp =================================================================== --- lib/CodeGen/PrologEpilogInserter.cpp +++ lib/CodeGen/PrologEpilogInserter.cpp @@ -107,6 +107,7 @@ unsigned MinCSFrameIndex = std::numeric_limits::max(); unsigned MaxCSFrameIndex = 0; + // FIXME: ShrinkWrap2: Merge the shrink-wrapping logic here. // Save and Restore blocks of the current function. Typically there is a // single save block, unless Windows EH funclets are involved. MBBVector SaveBlocks; @@ -237,8 +238,14 @@ delete RS; SaveBlocks.clear(); RestoreBlocks.clear(); - MFI.setSavePoint(nullptr); - MFI.setRestorePoint(nullptr); + if (!MFI.getShouldUseShrinkWrap2()) + MFI.setSavePoint(nullptr); + else + MFI.getSaves().clear(); + if (!MFI.getShouldUseShrinkWrap2()) + MFI.setRestorePoint(nullptr); + else + MFI.getRestores().clear(); return true; } @@ -307,6 +314,8 @@ // Use the points found by shrink-wrapping, if any. if (MFI.getSavePoint()) { + // FIXME: ShrinkWrap2: Remove check. + assert(!MFI.getShouldUseShrinkWrap2() && "Mixing shrink-wrapping passes."); SaveBlocks.push_back(MFI.getSavePoint()); assert(MFI.getRestorePoint() && "Both restore and save must be set"); MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); @@ -328,6 +337,63 @@ } } +/// Insert code that saves the callee saved registers used in the basic block. +static void insertCSRSaves(MachineBasicBlock &SaveBB, + ArrayRef CSIs) { + MachineFunction &Fn = *SaveBB.getParent(); + const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); + const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo(); + + assert(!CSIs.empty() && "No saves to insert."); + + MachineBasicBlock::iterator I = SaveBB.begin(); + // FIXME: ShrinkWrap2: Spill using target interface when X86's target stops + // inserting push / pop operations everywhere. + // if (!TFI->spillCalleeSavedRegisters(SaveBB, I, CSI, &TRI)) { + for (const CalleeSavedInfo &CSI : CSIs) { + // Insert the spill to the stack frame. + unsigned Reg = CSI.getReg(); + + // Update liveness. + if (!Fn.getRegInfo().isLiveIn(Reg)) + SaveBB.addLiveIn(Reg); + + // FIXME: ShrinkWrap2: Check if can be killed. + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(SaveBB, I, Reg, false, CSI.getFrameIdx(), RC, &TRI); + } + // } +} + +/// Insert code that restores the callee saved registers used in the basic +/// block. +static void insertCSRRestores(MachineBasicBlock &RestoreBB, + ArrayRef CSIs) { + MachineFunction &Fn = *RestoreBB.getParent(); + const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); + const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo(); + + assert(!CSIs.empty() && "No restores to insert."); + + // Restore using target interface. + MachineBasicBlock::iterator I = RestoreBB.getFirstTerminator(); + + // Restore all registers immediately before the return and any terminators + // that precede it. + // FIXME: ShrinkWrap2: Spill using target interface when X86's target stops + // inserting push / pop operations everywhere. + // if (!TFI->restoreCalleeSavedRegisters(Restore, I, CSI, TRI)) { + for (int i = CSIs.size() - 1; i >= 0; --i) { + unsigned Reg = CSIs[i].getReg(); + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(RestoreBB, I, Reg, CSIs[i].getFrameIdx(), RC, + &TRI); + assert(I != RestoreBB.begin() && + "loadRegFromStackSlot didn't insert any code!"); + } + // } +} + static void assignCalleeSavedSpillSlots(MachineFunction &F, const BitVector &SavedRegs, unsigned &MinCSFrameIndex, @@ -533,6 +599,123 @@ } } +// FIXME: ShrinkWrap2: Name. +static void doSpillCalleeSavedRegsShrinkWrap2(MachineFunction &Fn, + RegScavenger *RS, + unsigned &MinCSFrameIndex, + unsigned &MaxCSFrameIndex, + const MBBVector &SaveBlocks, + const MBBVector &RestoreBlocks) { + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + MachineFrameInfo &MFI = Fn.getFrameInfo(); + MachineFrameInfo::CalleeSavedMap &Saves = MFI.getSaves(); + MachineFrameInfo::CalleeSavedMap &Restores = MFI.getRestores(); + + BitVector SavedRegs; + // FIXME: ShrinkWrap2: Also have to call TFI->determineCalleeSaves, since it + // can have side-effects on internal state of a target (i.e. + // AArch64FunctionInfo) + TFI->determineCalleeSaves(Fn, SavedRegs, RS); + + // Now gather the callee-saved registers we found using shrink-wrapping. + // FIXME: ShrinkWrap2: We already gathered all the CSRs in ShrinkWrap. Reuse + // somehow? + BitVector ShrinkWrapSavedRegs{SavedRegs.size()}; + for (auto &Save : Saves) + for (const CalleeSavedInfo &CSI : Save.second) + ShrinkWrapSavedRegs.set(CSI.getReg()); + + // Gather the CSR that were found by the target hook, but not by the + // shrink-wrapping pass. + BitVector TargetAdded = ShrinkWrapSavedRegs; + TargetAdded.flip(); + TargetAdded &= SavedRegs; + + // Get all the return blocks. + BitVector ReturnBlocks{Fn.getNumBlockIDs()}; + for (MachineBasicBlock &MBB : Fn) + if (MBB.isReturnBlock()) + ReturnBlocks.set(MBB.getNumber()); + + auto EmplaceIfNotPresent = [&](std::vector &CSIs, + unsigned Reg) { + if (std::find_if(CSIs.begin(), CSIs.end(), [&](CalleeSavedInfo &CSI) { + return CSI.getReg() == Reg; + }) == CSIs.end()) + CSIs.emplace_back(Reg); + }; + + // For all the registers that were not found by shrink-wrapping, save them at + // the entry block, and restore them at all the return blocks. + for (int Reg = TargetAdded.find_first(); Reg > 0; + Reg = TargetAdded.find_next(Reg)) { + EmplaceIfNotPresent(Saves[&Fn.front()], Reg); + for (int MBBNum = ReturnBlocks.find_first(); MBBNum >= 0; + MBBNum = ReturnBlocks.find_next(MBBNum)) + EmplaceIfNotPresent(Restores[Fn.getBlockNumbered(MBBNum)], Reg); + } + + // FIXME: ShrinkWrap2: Re-use stack slots. What about target-specific hooks? + assignCalleeSavedSpillSlots(Fn, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex); + + MFI.setCalleeSavedInfoValid(true); + + if (Fn.getFunction()->hasFnAttribute(Attribute::Naked)) + return; + + // FIXME: ShrinkWrap2: This is awful. We first call + // assignCalleeSavedSpillSlots, that fills MFI.CalleeSavedInfo which is used + // for the ENTIRE function. Then, we need to reassign the FrameIdx back to the + // Saves / Restores map. + SmallVector *, unsigned>, 2> ToRemove; + const std::vector &CSIs = MFI.getCalleeSavedInfo(); + for (auto *Map : {&Saves, &Restores}) { + for (auto &Elt : *Map) { + for (const CalleeSavedInfo &CSI : Elt.second) { + unsigned Reg = CSI.getReg(); + // Look for the register in the assigned CSIs, and reassign it in the + // map. + auto It = std::find_if(CSIs.begin(), CSIs.end(), + [&](const CalleeSavedInfo &NewCSI) { + return NewCSI.getReg() == Reg; + }); + if (It != CSIs.end()) + // FIXME: ShrinkWrap2: const_cast... + const_cast(CSI).setFrameIdx(It->getFrameIdx()); + else // Also, if we can't find it in the list, it means the target + // removed it. x86 does this for FP, since the spill is part of the + // prologue emission. + ToRemove.emplace_back(&Elt.second, Reg); + } + } + } + for (auto& Pair : ToRemove) { + std::vector &V = *Pair.first; + unsigned Reg = Pair.second; + V.erase(std::remove_if(V.begin(), V.end(), + [&](const CalleeSavedInfo &CSI) { + return CSI.getReg() == Reg; + }), + V.end()); + } + + for (auto &Save : Saves) { + if (Save.second.empty()) // FIXME: Find a way to avoid empty sets. This can + // happen after the target decides to remove CSR. + continue; + + insertCSRSaves(*Save.first, Save.second); + // FIXME: ShrinkWrap2: Update liveness only after all spills / restores? + updateLiveness(Fn); + } + + for (auto &Restore : Restores) + if (!Restore.second.empty()) // FIXME: Find a way to avoid empty sets. This + // can happen after the target decides to + // remove CSR. + insertCSRRestores(*Restore.first, Restore.second); +} + static void doSpillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS, unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex, @@ -540,9 +723,16 @@ const MBBVector &RestoreBlocks) { const Function *F = Fn.getFunction(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + MachineFrameInfo &MFI = Fn.getFrameInfo(); + MinCSFrameIndex = std::numeric_limits::max(); MaxCSFrameIndex = 0; + // FIXME: ShrinkWrap2: Share code somehow. + if (MFI.getShouldUseShrinkWrap2()) + return doSpillCalleeSavedRegsShrinkWrap2( + Fn, RS, MinCSFrameIndex, MaxCSFrameIndex, SaveBlocks, RestoreBlocks); + // Determine which of the registers in the callee save list should be saved. BitVector SavedRegs; TFI->determineCalleeSaves(Fn, SavedRegs, RS); @@ -974,6 +1164,11 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); + // FIXME: ShrinkWrap2: Stack alginment / adjustment / etc. go in emitPrologue. + // For now, we add these at the entry / exit of the function, and we spill + // callee saves using our own blocks. There should be a way to shrink-wrap the + // stack operations as well. + // Add prologue to the function... for (MachineBasicBlock *SaveBlock : SaveBlocks) TFI.emitPrologue(Fn, *SaveBlock); @@ -982,9 +1177,11 @@ for (MachineBasicBlock *RestoreBlock : RestoreBlocks) TFI.emitEpilogue(Fn, *RestoreBlock); + // FIXME: ShrinkWrap2: Will this still work? for (MachineBasicBlock *SaveBlock : SaveBlocks) TFI.inlineStackProbe(Fn, *SaveBlock); + // FIXME: ShrinkWrap2: Will this still work? // Emit additional code that is required to support segmented stacks, if // we've been asked for it. This, when linked with a runtime with support // for segmented stacks (libgcc is one), will result in allocating stack @@ -994,6 +1191,7 @@ TFI.adjustForSegmentedStacks(Fn, *SaveBlock); } + // FIXME: ShrinkWrap2: Will this still work? // Emit additional code that is required to explicitly handle the stack in // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The // approach is rather similar to that of Segmented Stacks, but it uses a Index: lib/CodeGen/ShrinkWrap2.cpp =================================================================== --- /dev/null +++ lib/CodeGen/ShrinkWrap2.cpp @@ -0,0 +1,1213 @@ +//===-- ShrinkWrap2.cpp - Compute safe point for prolog/epilog insertion --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass is an improvement of the current shrink-wrapping pass, based, this +// time, on the dataflow analysis described in "Minimizing Register Usage +// Penalty at Procedure Calls - Fred C. Chow" [1]. The aim of this improvement +// is to remove the restriction that the current shrink-wrapping pass is having, +// which is having only one save / restore point for all the registers and the +// stack adjustment. +// FIXME: ShrinkWrap2: Random thoughts: +// - r193749 removed an old pass that was an implementation of [1]. +// - Cost model: use MachineBlockFrequency and some instruction cost model? +// - Split critical edges on demand? +// - Eliminate trivial cases where most of the CSRs are used in the entry block? +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" + +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" + +// FIXME: ShrinkWrap2: Fix name. +#define DEBUG_TYPE "shrink-wrap2" + +#define VERBOSE_DEBUG(X) \ + do { \ + if (VerboseDebug) \ + DEBUG(X); \ + } while (0); + +using namespace llvm; + +// FIXME: ShrinkWrap2: Fix name. +static cl::opt + EnableShrinkWrap2Opt("enable-shrink-wrap2", cl::Hidden, + cl::desc("enable the shrink-wrapping 2 pass")); + +static cl::opt + VerboseDebug("shrink-wrap-verbose", cl::Hidden, + cl::desc("verbose debug output")); + +// FIXME: ShrinkWrap2: Remove, debug. +static cl::opt ViewCFGDebug("shrink-wrap-view", cl::Hidden, + cl::desc("view cfg")); + +namespace { + +/// Iterator for successors / predecessors. This is here to work with +/// SmallVector and std::vector at the same time. +typedef const MachineBasicBlock *const *MBBIterator; + +// FIXME: ShrinkWrap2: Fix name. +class ShrinkWrap2 : public MachineFunctionPass { + typedef BitVector BBSet; + typedef BitVector RegSet; + // Idx = MBB.getNumber() + typedef SmallVector BBRegSetMap; + typedef DenseMap SparseBBRegSetMap; + + /// Store callee-saved-altering basic blocks. + SparseBBRegSetMap UsedCSR; + + /// All the CSR used in the function. This is used for quicker iteration over + /// the registers. + /// FIXME: ShrinkWrap2: PEI needs this, instead of calling + /// TII->determineCalleeSaves. + RegSet Regs; + RegSet TargetAddedRegs; + RegSet TargetRemovedRegs; + BBSet NoReturnBlocks; + + // FIXME: ShrinkWrap2: Explain anticipated / available and how the + // properties are used. + struct SWAttributes { + /// Is the register anticipated at the end of this basic block? + RegSet ANTOUT; + /// Is the register anticipated at the beginning of this basic block? + RegSet ANTIN; + /// Is the register available at the beginning of this basic block? + RegSet AVIN; + /// Is the register available at the end of this basic block? + RegSet AVOUT; + + SWAttributes(const MachineFunction &MF) { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + unsigned NumRegs = TRI.getNumRegs(); + for (RegSet *Regs : {&ANTOUT, &ANTIN, &AVIN, &AVOUT}) + (*Regs).resize(NumRegs); + } + }; + + /// An SCC that was discovered through the scc_iterator on the function. This + /// is used in order to detect loops, reducible *AND* irreducible. + struct SCCLoop { + /// The successors of the SCC. There are blocks outside the SCC. + SetVector> + Successors; + iterator_range successors() const { + return {&*Successors.begin(), &*Successors.end()}; + } + /// The predecessors of the SCC. There are blocks outside the SCC. + SetVector> + Predecessors; + iterator_range predecessors() const { + return {&*Predecessors.begin(), &*Predecessors.end()}; + } + /// The SCC number. This number is the smallest basic block number int he + /// SCC. This is used when we replace basic blocks with SCCs for loops. + unsigned Number; + unsigned getNumber() const { return Number; } + /// The number of blocks the SCC contains. + unsigned Size; + unsigned getSize() const { return Size; } + }; + + /// Wrapper around scc_iterator that collects SCCs that are loops, computes + /// their successor / predecessor and assigns an unique number based on the + /// basic blocks it contains. + struct SCCLoopInfo { + /// Own the SCCs. + SmallVector SCCs; + /// Map a basic block number to an SCCLoop number. The SCCLoop number is the + /// position in the `SCCs` vector, and it is differrent from the SCC::Number + /// attribute, which is the smallest basic block number in the SCC. + DenseMap MBBToSCC; + + /// Get the SCCLoop for a designated basic block number. If there is no + /// SCCLoop associated, return `nullptr`. + SCCLoop *getSCCLoopFor(unsigned MBBNum) { + auto It = MBBToSCC.find(MBBNum); + if (It == MBBToSCC.end()) + return nullptr; + return &SCCs[It->second]; + } + const SCCLoop *getSCCLoopFor(unsigned MBBNum) const { + return const_cast(this)->getSCCLoopFor(MBBNum); + } + void clear() { + SCCs.clear(); + MBBToSCC.clear(); + } + /// Initialize the successors / predecessors of the SCCLoops. + void init(const MachineFunction &MF); + }; + + /// The replacement for the MachineLoopInfo, that takes care of irreducible + /// loops as well. + SCCLoopInfo SI; + + typedef SmallVector AttributeMap; + + /// Final results. + SparseBBRegSetMap Saves; + SparseBBRegSetMap Restores; + + /// Emit remarks. + MachineOptimizationRemarkEmitter *ORE; + + /// Get the block number or the SCCLoop's number. + unsigned blockNumber(unsigned MBBNum) const; + + /// Get the block successors or the SCCLoop exit blocks. + iterator_range blockSuccessors(const MachineFunction &MF, + unsigned MBBNum) const; + + /// Get the block predecessors or the SCCLoop's predecessors. + iterator_range blockPredecessors(const MachineFunction &MF, + unsigned MBBNum) const; + + /// Populate the attribute maps with trivial properties from the used + /// registers. + void populateAttributes(const MachineFunction &MF, AttributeMap &Attrs) const; + /// Compute the attributes for one register. + // FIXME: ShrinkWrap2: Don't do this per register. + void computeAttributes(unsigned Reg, const MachineFunction &MF, + AttributeMap &Attrs) const; + /// Save the results for this particular register. + // FIXME: ShrinkWrap2: Don't do this per register. + bool gatherAttributesResults(unsigned Reg, const MachineFunction &MF, + AttributeMap &Attrs); + /// Dump the contents of the attributes. + // FIXME: ShrinkWrap2: Don't do this per register. + void dumpAttributes(unsigned Reg, const MachineFunction &MF, + AttributeMap &Attrs) const; + + /// Does the function satisfy the requirements for shrink-wrapping? + bool shouldShrinkWrap(const MachineFunction &MF); + + /// Determine all the calee saved register used in this function. + /// This fills out the Regs set, containing all the CSR used in the entire + /// function, and fills the UsedCSR map, containing all the CSR used per + /// basic block. + /// We don't use the TargetFrameLowering::determineCalleeSaves function + /// because we need to associate each usage of a CSR to the corresponding + /// basic block. + // FIXME: ShrinkWrap2: Target hook might add other callee saves. + // FIXME: ShrinkWrap2: Should we add the registers added by the target in + // the + // entry / exit block(s) ? + void determineCalleeSaves(MachineFunction &MF); + /// Remove uses and fill NoReturnBlocks with the blocks that we know are not + /// going to return from the function. + void removeUsesOnNoReturnPaths(MachineFunction &MF); + void dumpUsedCSR(const MachineFunction &MF) const; + + /// This algorithm relies on the fact that there are no critical edges. + // FIXME: ShrinkWrap2: Get rid of this. + bool splitCriticalEdges(MachineFunction &MF); + + /// Mark all the basic blocks around the loop (pred, succ) as used, + /// if there is an usage of a CSR inside a loop. We want to avoid any save / + /// restore operations in a loop. + void markUsesOutsideLoops(MachineFunction &MF); + + /// * Verify if the results are better than obvious results, like: + /// * CSR used in a single MBB: only one save and one restore. + /// * Remove empty entries from the Saves / Restores maps. + // FIXME: ShrinkWrap2: This shouldn't happen, we better fix the algorithm + // first. + void postProcessResults(MachineFunction &MF, + const SparseBBRegSetMap &UsedCSR); + + /// Return the save / restore points to MachineFrameInfo, to be used by PEI. + void returnToMachineFrame(MachineFunction &MF); + + /// Verify save / restore points by walking the CFG. + /// This asserts if anything went wrong. + // FIXME: ShrinkWrap2: Should we add a special flag for this? + // FIXME: ShrinkWrap2: Expensive checks? + void verifySavesRestores(MachineFunction &MF) const; + + /// Dump the final shrink-wrapping results. + void dumpResults(MachineFunction &MF) const; + + /// \brief Initialize the pass for \p MF. + void init(MachineFunction &MF) { SI.init(MF); } + + /// Clear the function's state. + void clear() { + UsedCSR.clear(); + + Regs.clear(); + TargetAddedRegs.clear(); + TargetRemovedRegs.clear(); + NoReturnBlocks.clear(); + + SI.clear(); + Saves.clear(); + Restores.clear(); + } + + // FIXME: ShrinkWrap2: releaseMemory? + +public: + static char ID; + + ShrinkWrap2() : MachineFunctionPass(ID) { + initializeShrinkWrap2Pass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return "Shrink Wrapping analysis"; } + + /// \brief Perform the shrink-wrapping analysis and update + /// the MachineFrameInfo attached to \p MF with the results. + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // End anonymous namespace. + +char ShrinkWrap2::ID = 0; +char &llvm::ShrinkWrap2ID = ShrinkWrap2::ID; + +// FIXME: ShrinkWrap2: Fix name. +INITIALIZE_PASS_BEGIN(ShrinkWrap2, "shrink-wrap2", "Shrink Wrap Pass", false, + false) +INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) +// FIXME: ShrinkWrap2: Fix name. +INITIALIZE_PASS_END(ShrinkWrap2, "shrink-wrap2", "Shrink Wrap Pass", false, + false) + +void ShrinkWrap2::SCCLoopInfo::init(const MachineFunction &MF) { + // Create the SCCLoops. + for (auto I = scc_begin(&MF); !I.isAtEnd(); ++I) { + // Skip non-loop SCCs. + if (!I.hasLoop()) + continue; + + SCCs.emplace_back(); + // The SCCLoop number is the smallest basic block number in the SCC. + unsigned Number = + (*std::min_element( + I->begin(), I->end(), + [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { + return A->getNumber() < B->getNumber(); + })) + ->getNumber(); + SCCs.back().Number = Number; + SCCs.back().Size = I->size(); + + // The number used in MBBToSCC is the position of the SCC in `SCCs` + for (const MachineBasicBlock *MBB : *I) + MBBToSCC[MBB->getNumber()] = SCCs.size() - 1; + } + + // Compute successors / predecessors of the SCCLoops. + for (const MachineBasicBlock &MBB : MF) { + for (const MachineBasicBlock *Succ : MBB.successors()) { + SCCLoop *MBBSCC = getSCCLoopFor(MBB.getNumber()); + SCCLoop *SuccSCC = getSCCLoopFor(Succ->getNumber()); + // The successor is a loop, but not the current block. It means the + // successor's predecessor is the current block. + if (!MBBSCC && SuccSCC) + SuccSCC->Predecessors.insert(&MBB); + // The successor is not a loop, but the current block is one. It means + // that the loop's successor is the block's successor. + else if (MBBSCC && !SuccSCC) + MBBSCC->Successors.insert(Succ); + // The successor and the block are loops. We now need to connect SCCs + // together. + else if (MBBSCC && SuccSCC && MBBSCC != SuccSCC) { + MBBSCC->Successors.insert(Succ); + SuccSCC->Predecessors.insert(&MBB); + } + } + for (const MachineBasicBlock *Pred : MBB.predecessors()) { + SCCLoop *MBBSCC = getSCCLoopFor(MBB.getNumber()); + SCCLoop *PredSCC = getSCCLoopFor(Pred->getNumber()); + // The predecessor is a loop, but not the current block. It means the + // predecessor's successor is the current block. + if (!MBBSCC && PredSCC) + PredSCC->Successors.insert(&MBB); + // The predecessor is not a loop, but the current block is one. It + // means that the loop's predecessor is the block's predecessor. + else if (MBBSCC && !PredSCC) + MBBSCC->Predecessors.insert(Pred); + // The successor and the block are loops. We now need to connect SCCs + // together. + else if (MBBSCC && PredSCC && MBBSCC != PredSCC) { + MBBSCC->Predecessors.insert(Pred); + PredSCC->Successors.insert(&MBB); + } + } + } +} + +unsigned ShrinkWrap2::blockNumber(unsigned MBBNum) const { + if (const SCCLoop *C = SI.getSCCLoopFor(MBBNum)) + return C->getNumber(); + return MBBNum; +} + +iterator_range +ShrinkWrap2::blockSuccessors(const MachineFunction &MF, unsigned MBBNum) const { + if (const SCCLoop *C = SI.getSCCLoopFor(MBBNum)) + return {C->Successors.begin(), C->Successors.end()}; + const MachineBasicBlock *MBB = MF.getBlockNumbered(MBBNum); + return {&*MBB->succ_begin(), &*MBB->succ_end()}; +} + +iterator_range +ShrinkWrap2::blockPredecessors(const MachineFunction &MF, + unsigned MBBNum) const { + if (const SCCLoop *C = SI.getSCCLoopFor(MBBNum)) + return {C->Predecessors.begin(), C->Predecessors.end()}; + const MachineBasicBlock *MBB = MF.getBlockNumbered(MBBNum); + return {&*MBB->pred_begin(), &*MBB->pred_end()}; +} + +void ShrinkWrap2::populateAttributes(const MachineFunction &MF, + AttributeMap &Attrs) const { + // Reserve + emplace_back to avoid copies of empty bitsets.. + Attrs.reserve(MF.getNumBlockIDs()); + for (unsigned i = 0; i < MF.getNumBlockIDs(); ++i) + Attrs.emplace_back(MF); + + for (auto &KV : UsedCSR) { + unsigned MBBNum = KV.first; + const RegSet &Regs = KV.second; + // Setting APP also affects ANTIN and AVOUT. + // ANTIN = APP || ANTOUT + Attrs[MBBNum].ANTIN |= Regs; + // AVOUT = APP || AVIN + Attrs[MBBNum].AVOUT |= Regs; + } +} + +void ShrinkWrap2::computeAttributes(unsigned Reg, const MachineFunction &MF, + AttributeMap &Attrs) const { + // FIXME: ShrinkWrap2: Just set. + auto AssignIfDifferent = [&](RegSet &Regs, bool New) { + if (Regs.test(Reg) != New) { + Regs.flip(Reg); + } + }; + auto UsesReg = [&](unsigned MBBNum) { + auto Found = UsedCSR.find(MBBNum); + if (Found == UsedCSR.end()) + return false; + return Found->second.test(Reg); + }; + + DenseMap SCCVisited; + + for (const MachineBasicBlock *MBB : post_order(&MF)) { + unsigned MBBNum = MBB->getNumber(); + if (const SCCLoop *C = SI.getSCCLoopFor(MBB->getNumber())) { + if (++SCCVisited[C] != C->getSize()) + continue; + else + MBBNum = C->getNumber(); + } + + SWAttributes &Attr = Attrs[MBBNum]; + // If there is an use of this register on *all* the paths starting from + // this basic block, the register is anticipated at the end of this + // block + // (propagate the IN attribute of successors to possibly merge saves) + // - + // | *false* if no successor. + // ANTOUT = | + // | && ANTIN(succ[i]) otherwise. + // - + RegSet &ANTOUTb = Attr.ANTOUT; + auto Successors = blockSuccessors(MF, MBB->getNumber()); + if (Successors.begin() == Successors.end()) + AssignIfDifferent(ANTOUTb, false); + else { + bool A = all_of(Successors, [&](const MachineBasicBlock *S) { + if (S == MBB) // Ignore self. + return true; + return Attrs[blockNumber(S->getNumber())].ANTIN.test(Reg); + }); + AssignIfDifferent(ANTOUTb, A); + } + + // If the register is used in the block, or if it is anticipated in all + // successors it is also anticipated at the beginning, since we consider + // entire blocks. + // - + // ANTIN = | APP || ANTOUT + // - + RegSet &ANTINb = Attr.ANTIN; + bool NewANTIN = UsesReg(MBBNum) || Attr.ANTOUT.test(Reg); + AssignIfDifferent(ANTINb, NewANTIN); + } + + SCCVisited.clear(); + + ReversePostOrderTraversal RPOT(&MF); + for (const MachineBasicBlock *MBB : RPOT) { + unsigned MBBNum = MBB->getNumber(); + if (const SCCLoop *C = SI.getSCCLoopFor(MBB->getNumber())) { + if (++SCCVisited[C] != 1) + continue; + else + MBBNum = C->getNumber(); + } + + SWAttributes &Attr = Attrs[MBBNum]; + // If there is an use of this register on *all* the paths arriving in + // this + // block, then the register is available in this block (propagate the + // out + // attribute of predecessors to possibly merge restores). + // - + // | *false* if no predecessor. + // AVIN = | + // | && AVOUT(pred[i]) otherwise. + // - + RegSet &AVINb = Attr.AVIN; + auto Predecessors = blockPredecessors(MF, MBB->getNumber()); + if (Predecessors.begin() == Predecessors.end()) + AssignIfDifferent(AVINb, false); + else { + bool A = all_of(Predecessors, [&](const MachineBasicBlock *P) { + if (P == MBB) // Ignore self. + return true; + return Attrs[blockNumber(P->getNumber())].AVOUT.test(Reg); + }); + AssignIfDifferent(AVINb, A); + } + + // If the register is used in the block, or if it is always available in + // all predecessors , it is also available on exit, since we consider + // entire blocks. + // - + // AVOUT = | APP || AVIN + // - + RegSet &AVOUTb = Attr.AVOUT; + bool NewAVOUT = UsesReg(MBBNum) || Attr.AVIN.test(Reg); + AssignIfDifferent(AVOUTb, NewAVOUT); + } + + VERBOSE_DEBUG(dumpAttributes(Reg, MF, Attrs)); +} + +bool ShrinkWrap2::gatherAttributesResults(unsigned Reg, + const MachineFunction &MF, + AttributeMap &Attrs) { + for (const MachineBasicBlock &MBB : MF) { + bool IsSCCLoop = false; + if (const SCCLoop *C = SI.getSCCLoopFor(MBB.getNumber())) { + if (static_cast(MBB.getNumber()) != C->getNumber()) + continue; + else + IsSCCLoop = true; + } + + unsigned MBBNum = blockNumber(MBB.getNumber()); + if (NoReturnBlocks.test(MBBNum)) + continue; + SWAttributes &Attr = Attrs[MBBNum]; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + + bool Change = false; + // Check if this block is ANTIN and has an incoming critical edge where it + // is not ANTIN. + if (Attr.ANTIN.test(Reg)) { + auto Preds = blockPredecessors(MF, MBBNum); + if (std::distance(Preds.begin(), Preds.end()) >= 2 || IsSCCLoop) { + for (const MachineBasicBlock *P : Preds) { + unsigned PredNum = blockNumber(P->getNumber()); + SWAttributes &Attr = Attrs[PredNum]; + RegSet &ANTINp = Attr.ANTIN; + if (!ANTINp.test(Reg)) { + VERBOSE_DEBUG(dbgs() << "Incoming critical edge in " << MBBNum + << ".\n"); + ANTINp.set(Reg); + Attr.AVOUT.set(Reg); + RegSet &Used = UsedCSR[PredNum]; + if (Used.empty()) + Used.resize(TRI.getNumRegs()); + Used.set(Reg); + Change = true; + } + } + } + } + if (Attr.AVOUT.test(Reg)) { + auto Succs = blockSuccessors(MF, MBBNum); + if (std::distance(Succs.begin(), Succs.end()) >= 2 || IsSCCLoop) { + for (const MachineBasicBlock *S : Succs) { + unsigned SuccNum = blockNumber(S->getNumber()); + SWAttributes &Attr = Attrs[SuccNum]; + RegSet &AVOUTs = Attr.AVOUT; + if (!AVOUTs.test(Reg)) { + VERBOSE_DEBUG(dbgs() << "Outgoing critical edge in " << MBBNum + << ".\n"); + AVOUTs.set(Reg); + Attr.ANTIN.set(Reg); + RegSet &Used = UsedCSR[SuccNum]; + if (Used.empty()) + Used.resize(TRI.getNumRegs()); + Used.set(Reg); + Change = true; + } + } + } + } + if (Change) + return false; + + // If the register uses are anticipated on *all* the paths leaving this + // block, and if the register is not available at the entrance of this + // block + // (if it is, then it means it has been saved already, but not + // restored), + // and if *none* of the predecessors anticipates this register on their + // output (we want to get the "highest" block), then we can identify a + // save + // point for the function. + // + // SAVE = ANTIN && !AVIN && !ANTIN(pred[i]) + // + bool NS = + none_of(blockPredecessors(MF, MBBNum), [&](const MachineBasicBlock *P) { + if (P == &MBB) // Ignore self. + return false; + return Attrs[blockNumber(P->getNumber())].ANTIN.test(Reg); + }); + if (Attr.ANTIN.test(Reg) && !Attr.AVIN.test(Reg) && NS) { + RegSet &Save = Saves[MBBNum]; + if (Save.empty()) + Save.resize(TRI.getNumRegs()); + Save.set(Reg); + } + + // If the register uses are available on *all* the paths leading to this + // block, and if the register is not anticipated at the exit of this + // block + // (if it is, then it means it has been restored already), and if *none* + // of + // the successors make the register available (we want to cover the + // deepest + // use), then we can identify a restrore point for the function. + // + // RESTORE = AVOUT && !ANTOUT && !AVOUT(succ[i]) + // + bool NR = + none_of(blockSuccessors(MF, MBBNum), [&](const MachineBasicBlock *S) { + if (S == &MBB) // Ignore self. + return false; + return Attrs[blockNumber(S->getNumber())].AVOUT.test(Reg); + }); + if (Attr.AVOUT.test(Reg) && !Attr.ANTOUT.test(Reg) && NR) { + RegSet &Restore = Restores[MBBNum]; + if (Restore.empty()) + Restore.resize(TRI.getNumRegs()); + Restore.set(Reg); + } + } + return true; +} +void ShrinkWrap2::dumpAttributes(unsigned Reg, const MachineFunction &MF, + AttributeMap &Attrs) const { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + for (const MachineBasicBlock &MBB : MF) { + unsigned MBBNum = MBB.getNumber(); + if (const SCCLoop *C = SI.getSCCLoopFor(MBBNum)) + if (MBBNum != C->getNumber()) + continue; + SWAttributes &Attr = Attrs[MBBNum]; + dbgs() << "BB#" << MBBNum << "<" << PrintReg(Reg, &TRI) << ">" + << ":\n\tANTOUT : " << Attr.ANTOUT.test(Reg) << '\n' + << "\tANTIN : " << Attr.ANTIN.test(Reg) << '\n' + << "\tAVIN : " << Attr.AVIN.test(Reg) << '\n' + << "\tAVOUT : " << Attr.AVOUT.test(Reg) << '\n'; + } +} + +bool ShrinkWrap2::shouldShrinkWrap(const MachineFunction &MF) { + auto BecauseOf = [&](const MachineBasicBlock *MBB, const char *Title, + DebugLoc Loc = {}) { + MachineOptimizationRemarkMissed R("shrink-wrap", Title, Loc, MBB); + ORE->emit(R); + return false; + }; + + // FIXME: ShrinkWrap2: Make a list of what could go wrong with exceptions. + if (MF.empty() || MF.hasEHFunclets() || MF.callsUnwindInit() || + MF.callsEHReturn()) + return BecauseOf(&MF.front(), "SkippedExceptions"); + + for (const MachineBasicBlock &MBB : MF) { + // FIXME: ShrinkWrap2: Make a list of what could go wrong with exceptions. + if (MBB.isEHPad() || MBB.isEHFuncletEntry()) + return BecauseOf(&MBB, "SkippedExceptions", MBB.front().getDebugLoc()); + + // FIXME: ShrinkWrap2: This should we removed when we remove critical edge + // splitting. + // No indirect branches. This is required in order to be able to split + // critical edges. + for (const MachineInstr &MI : MBB) + if (MI.isIndirectBranch()) + return BecauseOf(&MBB, "SkippedIndirectBranches", MI.getDebugLoc()); + } + + return true; +} + +// FIXME: ShrinkWrap2: Target hook might add other callee saves. +void ShrinkWrap2::determineCalleeSaves(MachineFunction &MF) { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Walk all the uses of each callee-saved register, and map them to their + // basic blocks. + const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); + + // FIXME: ShrinkWrap2: Naked functions. + // FIXME: ShrinkWrap2: __builtin_unwind_init. + + RegSet Regs(TRI.getNumRegs()); + + // Test all the target's callee saved registers. + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + + // Check for regmasks. + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB) { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) { + RegSet &Used = UsedCSR[blockNumber(MBB.getNumber())]; + if (Used.empty()) + Used.resize(TRI.getNumRegs()); + Used.set(Reg); + Regs.set(Reg); + } + } + } + } + + // If at least one of the aliases is used, mark the original register as + // used. + for (MCRegAliasIterator AliasReg(Reg, &TRI, true); AliasReg.isValid(); + ++AliasReg) { + // Walk all the uses, excepting for debug instructions. + for (auto MOI = MRI.reg_nodbg_begin(*AliasReg), e = MRI.reg_nodbg_end(); + MOI != e; ++MOI) { + // Get or create the registers used for the BB. + RegSet &Used = + UsedCSR[blockNumber(MOI->getParent()->getParent()->getNumber())]; + // Resize if it's the first time used. + if (Used.empty()) + Used.resize(TRI.getNumRegs()); + Used.set(Reg); + Regs.set(Reg); + } + // Look for any live-ins in basic blocks. + for (const MachineBasicBlock &MBB : MF) { + if (MBB.isLiveIn(*AliasReg)) { + RegSet &Used = UsedCSR[blockNumber(MBB.getNumber())]; + // Resize if it's the first time used. + if (Used.empty()) + Used.resize(TRI.getNumRegs()); + Used.set(Reg); + Regs.set(Reg); + } + } + } + } + + // Now, we're going to determine which registers are added / removed by the + // target itself. + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + std::unique_ptr RS( + TRI.requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr); + // FIXME: ShrinkWrap2: RegScavenger. + BitVector SavedRegsTarget; + TFI->determineCalleeSaves(MF, SavedRegsTarget, RS.get()); + TargetAddedRegs = Regs; + TargetAddedRegs.flip(); + TargetAddedRegs &= SavedRegsTarget; + TargetRemovedRegs = SavedRegsTarget; + TargetRemovedRegs.flip(); + TargetRemovedRegs &= Regs; + TargetRemovedRegs.flip(); +} + +void ShrinkWrap2::removeUsesOnNoReturnPaths(MachineFunction &MF) { + NoReturnBlocks.resize(MF.getNumBlockIDs()); + + // Mark all reachable blocks from any return blocks. + for (const MachineBasicBlock &MBB : MF) + if (MBB.isReturnBlock()) + for (const MachineBasicBlock *Block : inverse_depth_first(&MBB)) + NoReturnBlocks.set(Block->getNumber()); + + // Flip, so that we can get the non-reachable blocks. + NoReturnBlocks.flip(); + + for (int MBBNum : NoReturnBlocks.bits_set()) { + DEBUG(dbgs() << "Remove uses from no-return BB#" << MBBNum << '\n'); + UsedCSR.erase(MBBNum); + } +} + +void ShrinkWrap2::dumpUsedCSR(const MachineFunction &MF) const { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + + BBRegSetMap Sorted(MF.getNumBlockIDs()); + for (auto &KV : UsedCSR) + Sorted[KV.first] = KV.second; + + for (unsigned MBBNum = 0; MBBNum < Sorted.size(); ++MBBNum) { + const RegSet &Regs = Sorted[MBBNum]; + if (Regs.empty()) + continue; + + dbgs() << "BB#" << MBBNum << " uses : "; + int Reg = Regs.find_first(); + if (Reg > 0) + dbgs() << PrintReg(Reg, &TRI); + for (Reg = Regs.find_next(Reg); Reg > 0; Reg = Regs.find_next(Reg)) + dbgs() << ", " << PrintReg(Reg, &TRI); + dbgs() << '\n'; + } +} + +void ShrinkWrap2::postProcessResults(MachineFunction &MF, + const SparseBBRegSetMap &UsedCSR) { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + // If there is only one use of the register, and multiple saves / restores, + // remove them and place the save / restore at the used MBB. + for (int Reg : Regs.bits_set()) { + // FIXME: ShrinkWrap2: 2x std::find_if. + unsigned Uses = + count_if(UsedCSR, [&](const std::pair &CSR) { + return CSR.second.test(Reg); + }); + if (Uses == 1) { + // Gather all the saves. + BBSet SavesReg(MF.getNumBlockIDs()); + for (auto &KV : Saves) { + unsigned MBBNum = KV.first; + const RegSet &Regs = KV.second; + if (Regs.test(Reg)) + SavesReg.set(MBBNum); + } + + // Gather all the restores. + BBSet RestoresReg(MF.getNumBlockIDs()); + for (auto &KV : Restores) { + unsigned MBBNum = KV.first; + const RegSet &Regs = KV.second; + if (Regs.test(Reg)) + RestoresReg.set(MBBNum); + } + + if (SavesReg.count() == 1 && RestoresReg.count() == 1) + continue; + + for (int MBBNum : SavesReg.bits_set()) + Saves[MBBNum].reset(Reg); + for (int MBBNum : RestoresReg.bits_set()) + Restores[MBBNum].reset(Reg); + + auto It = find_if(UsedCSR, [&](const std::pair &CSR) { + return CSR.second.test(Reg); + }); + assert(It != UsedCSR.end() && "We are sure there is exactly one."); + + // Add it to the unique block that uses it. + unsigned MBBNum = It->first; + for (auto *Map : {&Saves, &Restores}) { + RegSet &Regs = (*Map)[MBBNum]; + if (Regs.empty()) + Regs.resize(TRI.getNumRegs()); + Regs.set(Reg); + } + } + } + + // Remove all the empty entries from the Saves / Restores maps. + // FIXME: ShrinkWrap2: Should we even have empty entries? + SmallVector ToRemove; + for (auto *Map : {&Saves, &Restores}) { + for (auto It = Map->begin(), End = Map->end(); It != End; ++It) + if (It->second.count() == 0) + ToRemove.push_back(It); + for (auto It : ToRemove) + Map->erase(It); + ToRemove.clear(); + } +} + +void ShrinkWrap2::returnToMachineFrame(MachineFunction &MF) { + MachineFrameInfo &MFI = MF.getFrameInfo(); + auto Transform = [&](SparseBBRegSetMap &Src, + MachineFrameInfo::CalleeSavedMap &Dst) { + for (auto &KV : Src) { + MachineBasicBlock *MBB = MF.getBlockNumbered(KV.first); + RegSet &Regs = KV.second; + std::vector &CSI = Dst[MBB]; + + for (int Reg : Regs.bits_set()) + CSI.emplace_back(Reg); + } + }; + MFI.setShouldUseShrinkWrap2(true); + Transform(Saves, MFI.getSaves()); + Transform(Restores, MFI.getRestores()); +} + +void ShrinkWrap2::verifySavesRestores(MachineFunction &MF) const { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + + auto HasReg = [&](const SparseBBRegSetMap &Map, unsigned Reg) { + return find_if(Map, [&](const std::pair &KV) { + return KV.second.test(Reg); + }) != Map.end(); + + }; + + auto RestoresReg = [&](unsigned Reg) { return HasReg(Restores, Reg); }; + auto SavesReg = [&](unsigned Reg) { return HasReg(Saves, Reg); }; + + // Check that all the CSRs used in the function are saved at least once. + for (int Reg : Regs.bits_set()) + assert(SavesReg(Reg) || RestoresReg(Reg) && "Used CSR is never saved!"); + + // Check that there are no saves / restores in a loop. + for (const SparseBBRegSetMap *Map : {&Saves, &Restores}) { + for (auto &KV : *Map) + assert(!SI.getSCCLoopFor(KV.first) && "Save / restore in a loop."); + } + + // Keep track of the currently saved registers. + RegSet Saved(TRI.getNumRegs()); + // Cache the state of each call, to avoid redundant checks. + std::vector> Cache(MF.getNumBlockIDs()); + + // Verify if: + // * All the saves are restored. + // * All the restores are related to a store. + // * There are no nested stores. + std::function verifySavesRestoresRec = [&]( + MachineBasicBlock *MBB) { + unsigned MBBNum = MBB->getNumber(); + // Don't even check no-return blocks. + if (MBB->succ_empty() && !MBB->isReturnBlock()) { + VERBOSE_DEBUG(dbgs() << "IN: BB#" << MBBNum << " is an no-return\n"); + return; + } + + SmallVectorImpl &State = Cache[MBBNum]; + if (find(State, Saved) != State.end()) { + VERBOSE_DEBUG(dbgs() << "IN: BB#" << MBBNum << " already visited.\n"); + return; + } + + State.push_back(Saved); + + VERBOSE_DEBUG( + dbgs() << "IN: BB#" << MBBNum << ": Save "; for (int Reg + : Saved.bits_set()) { + dbgs() << PrintReg(Reg, &TRI) << " "; + } dbgs() << '\n'); + + const RegSet &SavesMBB = Saves.lookup(MBBNum); + const RegSet &RestoresMBB = Restores.lookup(MBBNum); + + // Get the intersection of the currently saved registers and the + // registers to be saved for this basic block. If the intersection is + // not empty, it means we have nested saves for the same register. + RegSet Intersection(SavesMBB); + Intersection &= Saved; + + for (int Reg : Intersection.bits_set()) + DEBUG(dbgs() << PrintReg(Reg, &TRI) << " is saved twice.\n"); + + assert(Intersection.count() == 0 && "Nested saves for the same register."); + Intersection.reset(); + + // Save the registers to be saved. + for (int Reg : SavesMBB.bits_set()) { + Saved.set(Reg); + VERBOSE_DEBUG(dbgs() << "IN: BB#" << MBBNum << ": Save " + << PrintReg(Reg, &TRI) << ".\n"); + } + + // If the intersection of the currently saved registers and the + // registers to be restored for this basic block is not equal to the + // restores, it means we are trying to restore something that is not + // saved. + Intersection = RestoresMBB; + Intersection &= Saved; + + assert(Intersection.count() == RestoresMBB.count() && + "Not all restores are saved."); + + // Restore the registers to be restored. + for (int Reg : RestoresMBB.bits_set()) { + Saved.reset(Reg); + VERBOSE_DEBUG(dbgs() << "IN: BB#" << MBBNum << ": Restore " + << PrintReg(Reg, &TRI) << ".\n"); + } + + if (MBB->succ_empty() && Saved.count() != 0) + llvm_unreachable("Not all saves are restored."); + + // Using the current set of saved registers, walk all the successors + // recursively. + for (MachineBasicBlock *Succ : MBB->successors()) + verifySavesRestoresRec(Succ); + + // Restore the state prior of the function exit. + for (int Reg : RestoresMBB.bits_set()) { + Saved.set(Reg); + VERBOSE_DEBUG(dbgs() << "OUT: BB#" << MBBNum << ": Save " + << PrintReg(Reg, &TRI) << ".\n"); + } + for (int Reg : SavesMBB.bits_set()) { + Saved.reset(Reg); + VERBOSE_DEBUG(dbgs() << "OUT: BB#" << MBBNum << ": Restore " + << PrintReg(Reg, &TRI) << ".\n"); + } + }; + + verifySavesRestoresRec(&MF.front()); +} + +void ShrinkWrap2::dumpResults(MachineFunction &MF) const { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + + // for (MachineBasicBlock &MBB : MF) { + for (unsigned MBBNum = 0; MBBNum < MF.getNumBlockIDs(); ++MBBNum) { + if (Saves.count(MBBNum) || Restores.count(MBBNum)) { + DEBUG(dbgs() << "BB#" << MBBNum << ": Saves: "); + auto Save = Saves.lookup(MBBNum); + for (int Reg : Save.bits_set()) + DEBUG(dbgs() << PrintReg(Reg, &TRI) << ", "); + DEBUG(dbgs() << "| Restores: "); + auto Restore = Restores.lookup(MBBNum); + for (int Reg : Restore.bits_set()) + DEBUG(dbgs() << PrintReg(Reg, &TRI) << ", "); + + DEBUG(dbgs() << '\n'); + } + } +} + +bool ShrinkWrap2::splitCriticalEdges(MachineFunction &MF) { + auto IsCriticalEdge = [&](MachineBasicBlock *Src, MachineBasicBlock *Dst) { + return Src->succ_size() > 1 && Dst->pred_size() > 1; + }; + + bool Changed = true; + while (Changed) { + Changed = false; + SmallVector, 4> ToSplit; + for (MachineBasicBlock &MBB : MF) { + for (MachineBasicBlock *Succ : MBB.successors()) { + if (IsCriticalEdge(&MBB, Succ)) { + ToSplit.push_back({&MBB, Succ}); + VERBOSE_DEBUG(dbgs() << "Critical edge detected. Split.\n"); + } + } + } + + for (std::pair Split : ToSplit) + if (Split.first->SplitCriticalEdge(Split.second, *this)) + Changed = true; + } + return false; +} + +void ShrinkWrap2::markUsesOutsideLoops(MachineFunction &MF) { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + + // Keep track of the registers to attach to a basic block. + SparseBBRegSetMap ToInsert; + for (auto &KV : UsedCSR) { + unsigned MBBNum = KV.first; + const RegSet &Regs = KV.second; + + auto Mark = [&](const MachineBasicBlock *Block) { + unsigned BlockNum = Block->getNumber(); + if (ToInsert[BlockNum].empty()) + ToInsert[BlockNum].resize(TRI.getNumRegs()); + ToInsert[BlockNum] |= Regs; + VERBOSE_DEBUG(dbgs() << "Mark: BB#" << BlockNum << '\n'); + }; + + if (const SCCLoop *C = SI.getSCCLoopFor(MBBNum)) { + DEBUG(dbgs() << "Loop for CSR: BB#" << MBBNum << '\n'); + + // Mark all the entry blocks of the loop. + for (const MachineBasicBlock *Block : C->predecessors()) + Mark(Block); + + // Mark all the exit blocks of the loop. + for (const MachineBasicBlock *Exit : C->successors()) + Mark(Exit); + } + } + + for (auto &KV : ToInsert) + UsedCSR[blockNumber(KV.first)] |= KV.second; +} + +bool ShrinkWrap2::runOnMachineFunction(MachineFunction &MF) { + // Initialize the RemarkEmitter here, because we want to emit remarks on why + // we skipped the function. + ORE = &getAnalysis().getORE(); + if (skipFunction(*MF.getFunction()) || MF.empty() || + EnableShrinkWrap2Opt == cl::BOU_FALSE || !shouldShrinkWrap(MF)) + return false; + + DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); + + if (ViewCFGDebug == cl::BOU_TRUE) + MF.viewCFGOnly(); + + // FIXME: ShrinkWrap2: Sadly, we have to split critical edges before looking + // for all the used CSRs, since liveness analysis is impacted. This means that + // even for programs with no CSRs used, we have to split all the critical + // edges. + // splitCriticalEdges(MF); + + // Initialize the dependencies. + init(MF); + + VERBOSE_DEBUG(for (auto &SCC + : SI.SCCs) { + dbgs() << "SCCLoop: " << SCC.getNumber() << "\n Pred: "; + for (auto *Pred : SCC.Predecessors) + dbgs() << Pred->getNumber() << ", "; + dbgs() << "\n Succ: "; + for (auto *Succ : SCC.Successors) + dbgs() << Succ->getNumber() << ", "; + dbgs() << '\n'; + }); + + // Determine all the used callee saved registers. If there are none, avoid + // running this pass. + determineCalleeSaves(MF); + DEBUG(dumpUsedCSR(MF)); + + if (UsedCSR.empty()) { + clear(); + return false; + } + + // Don't bother saving if we know we're never going to return. + removeUsesOnNoReturnPaths(MF); + DEBUG(dbgs() << "**** After removing uses on no-return paths\n";); + DEBUG(dumpUsedCSR(MF)); + + // Collect all the CSRs. We do this now because we want to avoid iterating + // on registers that have been removed because of no-return blocks. + // FIXME: ShrinkWrap2: Is this really necessary? + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + Regs.resize(TRI.getNumRegs()); + for (auto &KV : UsedCSR) + Regs |= KV.second; + + markUsesOutsideLoops(MF); + DEBUG(dbgs() << "**** After marking uses inside loops\n";); + DEBUG(dumpUsedCSR(MF)); + + // FIXME: ShrinkWrap2: Find a better way to avoid treating added CSRs the same + // as original ones. This is needed for postProcessResults. + // FIXME: Probably just save / restore once per block if there is only one + // register from the beginning. + auto OldUsedCSR = UsedCSR; + + // Use this scope to get rid of the dataflow attributes after the + // computation. + { + // Compute the dataflow attributes described by Fred C. Chow. + AttributeMap Attrs; + populateAttributes(MF, Attrs); + // For each register, compute the data flow attributes. + for (int Reg : Regs.bits_set()) { + // FIXME: ShrinkWrap2: Avoid recomputing all the saves / restores. + do { + for (auto *Map : {&Saves, &Restores}) { + for (auto &KV : *Map) { + RegSet &Regs = KV.second; + Regs.reset(Reg); + } + } + // Compute the attributes. + computeAttributes(Reg, MF, Attrs); + // Gather the results. + } while (!gatherAttributesResults(Reg, MF, Attrs)); + VERBOSE_DEBUG(dumpResults(MF)); + } + } + + DEBUG(dbgs() << "**** Analysis results\n";); + DEBUG(dumpResults(MF)); + + postProcessResults(MF, OldUsedCSR); + + DEBUG(dbgs() << "**** Shrink-wrapping results\n"); + DEBUG(dumpResults(MF)); + + // FIXME: ShrinkWrap2: Enable EXPENSIVE_CHECKS. + //#ifdef EXPENSIVE_CHECKS + verifySavesRestores(MF); + //#endif // EXPENSIVE_CHECKS + + // FIXME: ShrinkWrap2: Should merge the behaviour in PEI? + // If there is only one save block, and it's the first one, don't forward + // anything to the MachineFrameInfo. + // We also could have no saves, since on no-return paths, we can remove + // saves. + if (Saves.size() == 1 && + Saves.begin()->first == static_cast(MF.front().getNumber()) && + Saves.begin()->second == Regs) + DEBUG(dbgs() << "No shrink-wrapping results.\n"); + else + returnToMachineFrame(MF); + clear(); + + return false; +} Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -621,8 +621,10 @@ addPostRegAlloc(); // Insert prolog/epilog code. Eliminate abstract frame index references... - if (getOptLevel() != CodeGenOpt::None) - addPass(&ShrinkWrapID); + // if (getOptLevel() != CodeGenOpt::None) + //addPass(&ShrinkWrapID); + // FIXME: ShrinkWrap2: Merge. It's enabled by default for test / debug purposes. + addPass(&ShrinkWrap2ID); // Prolog/Epilog inserter needs a TargetMachine to instantiate. But only // do so if it hasn't been disabled, substituted, or overridden. Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -1068,7 +1068,12 @@ if (X86FI->getRestoreBasePointer()) FrameSize += SlotSize; - NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); + NumBytes = FrameSize; + // FIXME: ShrinkWrap2: Since we disabled the push / pop spilling, we now + // have to include the callee saves in our frame size, so that our sp + // displacement can be updated properly. + if (!MFI.getShouldUseShrinkWrap2()) + NumBytes -= X86FI->getCalleeSavedFrameSize(); // Callee-saved registers are pushed on stack before the stack is realigned. if (TRI->needsStackRealignment(MF) && !IsWin64Prologue) @@ -1133,7 +1138,12 @@ } } else { assert(!IsFunclet && "funclets without FPs not yet implemented"); - NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); + NumBytes = StackSize; + // FIXME: ShrinkWrap2: Since we disabled the push / pop spilling, we now + // have to include the callee saves in our frame size, so that our sp + // displacement can be updated properly. + if (!MFI.getShouldUseShrinkWrap2()) + NumBytes -= X86FI->getCalleeSavedFrameSize(); } // For EH funclets, only allocate enough space for outgoing calls. Save the @@ -1146,6 +1156,10 @@ bool PushedRegs = false; int StackOffset = 2 * stackGrowth; + // FIXME: Add CFI for all the callee saved registers. Since the saves / + // restores are not at the beginning of the function, we need to go through + // all the basic blocks. + while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup) && (MBBI->getOpcode() == X86::PUSH32r || @@ -1577,7 +1591,12 @@ } else if (hasFP(MF)) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; - NumBytes = FrameSize - CSSize; + NumBytes = FrameSize; + // FIXME: ShrinkWrap2: Since we disabled the push / pop spilling, we now + // have to include the callee saves in our frame size, so that our sp + // displacement can be updated properly. + if (!MFI.getShouldUseShrinkWrap2()) + NumBytes -= CSSize; // Callee-saved registers were pushed on stack before the stack was // realigned. @@ -1589,7 +1608,13 @@ TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr) .setMIFlag(MachineInstr::FrameDestroy); } else { - NumBytes = StackSize - CSSize; + NumBytes = StackSize; + // FIXME: ShrinkWrap2: Since we disabled the push / pop spilling, we now + // have to include the callee saves in our frame size, so that our sp + // displacement can be updated properly. + if (!MFI.getShouldUseShrinkWrap2()) + NumBytes -= CSSize; + } uint64_t SEHStackAllocAmt = NumBytes; @@ -1650,6 +1675,12 @@ unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt); uint64_t LEAAmount = IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize; + // FIXME: ShrinkWrap2: Here, we can't assume we are going to pop all the + // callee saves (because we aren't, we actually move them back, then adjust + // the stack), so we just want to restore the stack pointer. This should go + // away at some point... + if (MFI.getShouldUseShrinkWrap2()) + LEAAmount = 0; // There are only two legal forms of epilogue: // - add SEHAllocationSize, %rsp Index: test/CodeGen/X86/ShrinkWrapping/2006-04-27-ISelFoldingBug-Reduced.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/2006-04-27-ISelFoldingBug-Reduced.mir @@ -0,0 +1,45 @@ +# RUN: llc -march=x86 -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: asserts +# This is a reduced test case from test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll +--- | + define void @loadAndRLEsource_no_exit_2E_1_label_2E_0() nounwind { + entry: + ret void + } +... +--- +name: loadAndRLEsource_no_exit_2E_1_label_2E_0 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.3 + + JMP_1 %bb.3 + + bb.1: + RET 0 + + bb.2: + RET 0 + + bb.3: + successors: %bb.4, %bb.2 + + %esi = MOV32ri 32 + + NOOP implicit-def %eflags + JGE_1 %bb.2, implicit killed %eflags + JMP_1 %bb.4 + + bb.4: + successors:%bb.1, %bb.2 + + NOOP implicit-def %eflags + JNE_1 %bb.1, implicit killed %eflags + JMP_1 %bb.2 +... +#CHECK-LABEL: loadAndRLEsource_no_exit_2E_1_label_2E_0 + +#CHECK: BB#3 uses : %ESI +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#3: Saves: %ESI, | Restores: %ESI, Index: test/CodeGen/X86/ShrinkWrapping/2007-08-09-IllegalX86-64Asm-Reduced.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/2007-08-09-IllegalX86-64Asm-Reduced.mir @@ -0,0 +1,39 @@ +# RUN: llc -march=x86 -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: asserts +# This is a reduced test case from test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll +--- | + define void @ubyte_divmod() nounwind { + entry: + ret void + } +... +--- +name: ubyte_divmod +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.4, %bb.2 + + NOOP implicit-def %eflags + JNE_1 %bb.4, implicit killed %eflags + JMP_1 %bb.2 + + bb.2: + successors: %bb.3, %bb.4 + + %ebx = MOV32ri 30 + NOOP implicit-def %eflags + JNE_1 %bb.4, implicit killed %eflags + JMP_1 %bb.3 + + bb.3: + RET 0 + + bb.4: + RET 0 +... +#CHECK-LABEL: ubyte_divmod + +#CHECK: BB#1 uses : %EBX +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#1: Saves: %EBX, | Restores: %EBX, Index: test/CodeGen/X86/ShrinkWrapping/2007-11-06-InstrSched.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/2007-11-06-InstrSched.mir @@ -0,0 +1,43 @@ +# RUN: llc -march=x86 -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: asserts +# This is a reduced test case from test/CodeGen/X86/2007-11-06-InstrSched.ll. +--- | + define void @foo() nounwind { + entry: + ret void + } +... +--- +name: foo +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.3 + + NOOP implicit-def %eflags + JNE_1 %bb.3, implicit killed %eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.4 + + JMP_1 %bb.4 + + bb.3: + successors: %bb.3, %bb.4 + + %esi = MOV32ri 30 + NOOP implicit-def %eflags + JB_1 %bb.3, implicit killed %eflags + JMP_1 %bb.4 + + bb.4: + + RET 0 +... +#CHECK-LABEL: foo + +#CHECK: BB#2 uses : %ESI +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#0: Saves: %ESI, | Restores: +#CHECK-NEXT: BB#3: Saves: | Restores: %ESI, Index: test/CodeGen/X86/ShrinkWrapping/2008-09-29-ReMatBug.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/2008-09-29-ReMatBug.mir @@ -0,0 +1,116 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: asserts +# This is a reduced test case from test/CodeGen/X86/2008-09-29-ReMatBug.ll. +--- | + define void @stringRepresentation() nounwind { + entry: + ret void + } +... +--- +name: stringRepresentation +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + + NOOP implicit-def %eflags + JNE_1 %bb.2, implicit killed %eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.15 + + JMP_1 %bb.15 + + bb.2: + successors: %bb.11 + + %r15 = MOV64ri 33 + %r14 = MOV64ri 33 + %rbx = MOV64ri 33 + JMP_1 %bb.11 + + bb.3: + successors: %bb.4, %bb.3 + + NOOP implicit-def %eflags + JNE_1 %bb.3, implicit killed %eflags + JMP_1 %bb.4 + + bb.4: + successors: %bb.6 + liveins: %r14 + + %rdi = COPY %r14 + JMP_1 %bb.6 + + bb.5: + successors: %bb.6 + + JMP_1 %bb.6 + + bb.6: + successors: %bb.7 + + JMP_1 %bb.7 + + + bb.7: + successors: %bb.8, %bb.9 + + NOOP implicit-def %eflags + JA_1 %bb.8, implicit killed %eflags + JMP_1 %bb.9 + + bb.8: + successors: %bb.5, %bb.7 + + NOOP implicit-def %eflags + JE_1 %bb.5, implicit killed %eflags + JMP_1 %bb.7 + + bb.9: + successors: %bb.10, %bb.7 + liveins: %rbx + + NOOP implicit-def %eflags + JE_1 %bb.7, implicit killed %eflags + JMP_1 %bb.10 + + bb.10: + successors: %bb.11 + + + bb.11: + successors: %bb.12, %bb.3 + + NOOP implicit-def %eflags + JE_1 %bb.12, implicit killed %eflags + JMP_1 %bb.3 + + bb.12: + successors: %bb.13, %bb.14 + + NOOP implicit-def %eflags + JE_1 %bb.14, implicit killed %eflags + + bb.13: + successors: %bb.15 + + JMP_1 %bb.15 + + bb.14: + RET 0 + + bb.15: + RET 0 +... +#CHECK-LABEL: stringRepresentation + +#CHECK: BB#2 uses : %RBX, %R14, %R15 +#CHECK-NEXT: BB#3 uses : %RBX, %R14 +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#2: Saves: %RBX, %R14, %R15, | Restores: %R15 +#CHECK-NEXT: BB#13: Saves: | Restores: %RBX, %R14 +#CHECK-NEXT: BB#14: Saves: | Restores: %RBX, %R14 Index: test/CodeGen/X86/ShrinkWrapping/2009-04-27-CoalescerAsser-CriticalEdges-Reduced.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/2009-04-27-CoalescerAsser-CriticalEdges-Reduced.mir @@ -0,0 +1,55 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: asserts +# This is a reduced test case from test/CodeGen/X86/2009-04-27-CoalescerAssert.ll +--- | + define void @getAffNeighbour() nounwind { + entry: + ret void + } +... +--- +name: getAffNeighbour +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.3, %bb.1 + + NOOP implicit-def %eflags + JNE_1 %bb.3, implicit killed %eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.4 + + JMP_1 %bb.4 + + bb.2: + + bb.3: + successors: %bb.4, %bb.2 + + NOOP implicit-def %eflags + JNE_1 %bb.2, implicit killed %eflags + JMP_1 %bb.4 + + bb.4: + successors: %bb.6, %bb.5 + + %rbx = MOV64ri32 30 + + NOOP implicit-def %eflags + JE_1 %bb.6, implicit killed %eflags + JMP_1 %bb.5 + + bb.5: + RET 0 + + bb.6: + RET 0 + +... +#CHECK-LABEL: getAffNeighbour + +#CHECK: BB#4 uses : %RBX +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#4: Saves: %RBX, | Restores: %RBX, Index: test/CodeGen/X86/ShrinkWrapping/2009-09-10-SpillComments.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/2009-09-10-SpillComments.mir @@ -0,0 +1,70 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: asserts +# This is a reduced test case from test/CodeGen/X86/2009-09-10-SpillComments.ll +# FIXME: ShrinkWrap2: this crashes with regions enabled. +--- | + define void @walk_fixup_memory_subreg() nounwind { + entry: + ret void + } +... +--- +name: walk_fixup_memory_subreg +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.6, %bb.1 + + %rbx = MOV64ri 30 + NOOP implicit-def %eflags + JNE_1 %bb.6, implicit killed %eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.2, %bb.3 + liveins: %rbx + + NOOP implicit-def %eflags + JNE_1 %bb.3, implicit killed %eflags + JMP_1 %bb.2 + + bb.2: + RET 0 + + bb.3: + successors: %bb.4 + liveins: %rbx + + %ebp = MOV32r0 implicit-def dead %eflags + + bb.4: + successors: %bb.5, %bb.4 + liveins: %ebp, %rbx + + NOOP implicit-def %eflags + JNE_1 %bb.4, implicit killed %eflags + JMP_1 %bb.5 + + bb.5: + successors: %bb.4 + liveins: %ebp, %rbx + + %rbx = ADD64rr %rbx, %rbx, implicit-def %eflags + JMP_1 %bb.4 + + bb.6: + RET 0 +... +#CHECK-LABEL: walk_fixup_memory_subreg + +#CHECK: BB#0 uses : %RBX +#CHECK-NEXT: BB#1 uses : %RBX +#CHECK-NEXT: BB#3 uses : %RBP, %RBX +#CHECK-NEXT: BB#4 uses : %RBP, %RBX +#CHECK-NEXT: Remove uses from no-return BB#3 +#CHECK-NEXT: Remove uses from no-return BB#4 +#CHECK-NEXT: Remove uses from no-return BB#5 +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#0: Saves: %RBX, | Restores: +#CHECK-NEXT: BB#2: Saves: | Restores: %RBX, +#CHECK-NEXT: BB#6: Saves: | Restores: %RBX, Index: test/CodeGen/X86/ShrinkWrapping/BasicBranch.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/BasicBranch.mir @@ -0,0 +1,39 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRE: asserts +--- | + define void @fun() nounwind { + entry: + ret void + } +... +--- +name: fun +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.2, %bb.1 + + NOOP implicit-def %eflags + JE_1 %bb.2, implicit killed %eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.3 + + %rbx = COPY %rsp + JMP_1 %bb.3 + + bb.2: + RET 0 + + bb.3: + %rbx = MOV64ri 30 + RET 0 +... +#CHECK-LABEL: fun + +#CHECK: BB#1 uses : %RBX +#CHECK-NEXT: BB#3 uses : %RBX +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#1: Saves: %RBX, | Restores: +#CHECK-NEXT: BB#3: Saves: | Restores: %RBX, Index: test/CodeGen/X86/ShrinkWrapping/InfiniteLoop.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/InfiniteLoop.mir @@ -0,0 +1,39 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: asserts +# Check that we don't save on a branch that never returns. +--- | + define void @foo() nounwind { + entry: + ret void + } +... +--- +name: foo +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %edi + + NOOP implicit-def %eflags + JNE_1 %bb.2, implicit killed %eflags + JMP_1 %bb.1 + + bb.1: + RET 0 + + bb.2: + successors: %bb.3 + + %rbx = MOV64ri 30 + + bb.3: + successors: %bb.3 + + JMP_1 %bb.3 +... +#CHECK-LABEL: foo + +#CHECK: BB#2 uses : %RBX +#CHECK-NEXT: Remove uses from no-return BB#2 +#CHECK-NOT: Saves: +#CHECK-NOT: restores: Index: test/CodeGen/X86/ShrinkWrapping/IrreducibleCFG.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/IrreducibleCFG.mir @@ -0,0 +1,84 @@ +# RUN: llc -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: asserts +--- | + define void @getAffNeighbour() #0 { + entry: + ret void + } + + attributes #0 = { nounwind } + +... +--- +name: getAffNeighbour +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.10, %bb.6 + + NOOP implicit-def %eflags + JNE_1 %bb.10, implicit killed %eflags + JMP_1 %bb.6 + + bb.1: + successors: %bb.6 + + JMP_1 %bb.6 + + bb.2: + successors: %bb.10 + + JMP_1 %bb.10 + + bb.3: + successors: %bb.4 + + %ebx = MOV32ri 30 + JMP_1 %bb.4 + + bb.4: + successors: %bb.5, %bb.9 + + NOOP implicit-def %eflags + JNE_1 %bb.9, implicit killed %eflags + JMP_1 %bb.5 + + bb.5: + RET 0 + + bb.6: + successors: %bb.2, %bb.7 + + NOOP implicit-def %eflags + JE_1 %bb.2, implicit killed %eflags + JMP_1 %bb.7 + + bb.7: + successors: %bb.4 + + JMP_1 %bb.4 + + bb.8: + successors: %bb.3, %bb.1 + + NOOP implicit-def %eflags + JNE_1 %bb.3, implicit killed %eflags + JMP_1 %bb.1 + + bb.9: + successors: %bb.4, %bb.8 + + NOOP implicit-def %eflags + JNE_1 %bb.4, implicit killed %eflags + JMP_1 %bb.8 + + bb.10: + successors: %bb.7 + + JMP_1 %bb.7 + +... +#CHECK: BB#1 uses : %RBX +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#0: Saves: %RBX, | Restores: +#CHECK-NEXT: BB#5: Saves: | Restores: %RBX, Index: test/CodeGen/X86/ShrinkWrapping/LoopBasic.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/LoopBasic.mir @@ -0,0 +1,62 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: asserts +--- | + define void @freqSaveAndRestoreOutsideLoop2() nounwind { + entry: + ret void + } +... +--- +name: freqSaveAndRestoreOutsideLoop2 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.2, %bb.1 + + + NOOP implicit-def %eflags + JE_1 %bb.2, implicit killed %eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.3 + + JMP_1 %bb.3 + + bb.2: + RET 0 + + bb.3: + successors: %bb.4, %bb.5 + + NOOP implicit-def %eflags + JNE_1 %bb.5, implicit killed %eflags + JMP_1 %bb.4 + + bb.4: + successors: %bb.6 + + JMP_1 %bb.6 + + bb.5: + successors: %bb.6 + + %ebx = MOV32ri 12 + JMP_1 %bb.6 + + bb.6: + successors: %bb.7, %bb.3 + + NOOP implicit-def %eflags + JNE_1 %bb.3, implicit killed %eflags + JMP_1 %bb.7 + + bb.7: + RET 0 +... +#CHECK-LABEL: freqSaveAndRestoreOutsideLoop2 + +#CHECK: BB#3 uses : %RBX +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#1: Saves: %RBX, | Restores: +#CHECK-NEXT: BB#7: Saves: | Restores: %RBX, Index: test/CodeGen/X86/ShrinkWrapping/LoopNoPreheader.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/LoopNoPreheader.mir @@ -0,0 +1,54 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRE: asserts +--- | + define void @fun() nounwind { + entry: + ret void + } +... +--- +name: fun +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.2, %bb.1 + + NOOP implicit-def %eflags + JE_1 %bb.2, implicit killed %eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.3 + + JMP_1 %bb.3 + + bb.2: + successors: %bb.3 + + JMP_1 %bb.3 + + bb.3: + successors: %bb.4 + + %rbx = MOV64ri 30 + JMP_1 %bb.4 + + bb.4: + successors: %bb.3, %bb.5 + + NOOP implicit-def %eflags + JE_1 %bb.3, implicit killed %eflags + JMP_1 %bb.5 + RET 0 + + bb.5: + + RET 0 + +... +#CHECK-LABEL: fun + +#CHECK: BB#3 uses : %RBX +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#0: Saves: %RBX, | Restores: +#CHECK-NEXT: BB#5: Saves: | Restores: %RBX, Index: test/CodeGen/X86/ShrinkWrapping/LoopNoPreheaderLatchExit.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/LoopNoPreheaderLatchExit.mir @@ -0,0 +1,50 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRE: asserts +# XFAIL: * +--- | + define void @fun() nounwind { + entry: + ret void + } +... +--- +name: fun +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.2, %bb.1 + + NOOP implicit-def %eflags + JE_1 %bb.2, implicit killed %eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.3 + + JMP_1 %bb.3 + + bb.2: + successors: %bb.3 + + JMP_1 %bb.3 + + bb.3: + successors: %bb.4 + + %rbx = MOV64ri 30 + JMP_1 %bb.4 + + bb.4: + successors: %bb.3 + + NOOP implicit-def %eflags + JE_1 %bb.3, implicit killed %eflags + RET 0 + +... +#CHECK-LABEL: fun + +#CHECK: BB#3 uses : %RBX +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#0: Saves: %RBX, | Restores: +#CHECK-NEXT: BB#3: Saves: | Restores: %RBX, Index: test/CodeGen/X86/ShrinkWrapping/MultipleCriticalEdges.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/MultipleCriticalEdges.mir @@ -0,0 +1,50 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRE: asserts +--- | + define void @foo() nounwind { + entry: + ret void + } + +... +--- +name: foo +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.3 + + NOOP implicit-def %eflags + JNE_1 %bb.3, implicit killed %eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.4, %bb.2 + + %ebx = MOV32r0 implicit-def dead %eflags + NOOP implicit-def %eflags + JNE_1 %bb.2, implicit killed %eflags + JMP_1 %bb.4 + + bb.2: + successors: %bb.4, %bb.3 + + %ebx = MOV32r0 implicit-def dead %eflags + NOOP implicit-def %eflags + JNE_1 %bb.3, implicit killed %eflags + JMP_1 %bb.4 + + bb.4: + RET 0 + + bb.3: + RET 0 + +... +#CHECK-LABEL: foo +#CHECK: BB#1 uses : %RBX +#CHECK-NEXT: BB#2 uses : %RBX +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#0: Saves: %RBX, | Restores: +#CHECK-NEXT: BB#3: Saves: | Restores: %RBX, +#CHECK-NEXT: BB#4: Saves: | Restores: %RBX, Index: test/CodeGen/X86/ShrinkWrapping/NestedLoopsCriticalEdges.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/NestedLoopsCriticalEdges.mir @@ -0,0 +1,56 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: asserts +--- | + define void @foo() nounwind { + entry: + ret void + } +... +--- +name: foo +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.6 + + NOOP implicit-def %eflags + JE_1 %bb.1, implicit killed %eflags + JMP_1 %bb.6 + + bb.1: + successors: %bb.2, %bb.6 + + NOOP implicit-def %eflags + JE_1 %bb.2, implicit killed %eflags + JMP_1 %bb.6 + + bb.2: + successors: %bb.3 + %rbx = MOV64ri 12 + + bb.3: + successors: %bb.4 + + bb.4: + successors: %bb.4, %bb.5 + + NOOP implicit-def %eflags + JE_1 %bb.4, implicit killed %eflags + JMP_1 %bb.5 + + bb.5: + successors: %bb.6, %bb.3 + + NOOP implicit-def %eflags + JE_1 %bb.6, implicit killed %eflags + JMP_1 %bb.3 + + bb.6: + RET 0 + +... +#CHECK-LABEL: foo + +#CHECK: BB#2 uses : %RBX +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#2: Saves: %RBX, | Restores: %RBX, Index: test/CodeGen/X86/ShrinkWrapping/Paper1Figure2CriticalEdge.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/Paper1Figure2CriticalEdge.mir @@ -0,0 +1,47 @@ +# RUN: llc -mtriple=i386-apple-darwin10 -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: asserts +--- | + define void @foo() nounwind { + entry: + ret void + } +... +--- +name: foo +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + + NOOP implicit-def %eflags + JNE_1 %bb.2, implicit killed %eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.3, %bb.4 + NOOP implicit-def %eflags + JE_1 %bb.4, implicit killed %eflags + JMP_1 %bb.3 + + bb.2: + successors: %bb.4 + + %ebx = MOV32ri 89 + JMP_1 %bb.4 + + bb.3: + RET 0 + + bb.4: + + %ebx = MOV32ri 89 + RET 0 +... +#CHECK-LABEL: foo + +#CHECK: BB#2 uses : %EBX +#CHECK-NEXT: BB#4 uses : %EBX +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#0: Saves: %EBX, | Restores: +#CHECK-NEXT: BB#3: Saves: | Restores: %EBX, +#CHECK-NEXT: BB#4: Saves: | Restores: %EBX, Index: test/CodeGen/X86/ShrinkWrapping/Paper2Figure1.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/Paper2Figure1.mir @@ -0,0 +1,57 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: asserts +--- | + define void @foo() nounwind { + entry: + ret void + } +... +--- +name: foo +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + + NOOP implicit-def %eflags + JNE_1 %bb.2, implicit killed %eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.3 + JMP_1 %bb.3 + + bb.2: + successors: %bb.3 + + %ebx = MOV32ri 89 + JMP_1 %bb.3 + + bb.3: + successors: %bb.5, %bb.4 + + NOOP implicit-def %eflags + JE_1 %bb.5, implicit killed %eflags + JMP_1 %bb.4 + + bb.4: + successors: %bb.6 + + %ebx = MOV32ri 89 + JMP_1 %bb.6 + + bb.5: + successors: %bb.6 + + JMP_1 %bb.6 + + bb.6: + RET 0 +... +#CHECK-LABEL: foo + +#CHECK: BB#2 uses : %RBX +#CHECK-NEXT: BB#4 uses : %RBX +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#2: Saves: %RBX, | Restores: %RBX, +#CHECK-NEXT: BB#4: Saves: %RBX, | Restores: %RBX, Index: test/CodeGen/X86/ShrinkWrapping/Paper2Figure2.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/Paper2Figure2.mir @@ -0,0 +1,121 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: asserts +--- | + define void @fun() nounwind { + entry: + ret void + } +... +--- +name: fun +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.8 + + NOOP implicit-def %eflags + JNE_1 %bb.8, implicit killed %eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.2, %bb.7 + + NOOP implicit-def %eflags + JNE_1 %bb.7, implicit killed %eflags + JMP_1 %bb.2 + + bb.2: + successors: %bb.3, %bb.5 + + NOOP implicit-def %eflags + JNE_1 %bb.5, implicit killed %eflags + JMP_1 %bb.3 + + bb.3: + successors: %bb.4, %bb.5 + + %ebx = MOV32ri 9 + NOOP implicit-def %eflags + JNE_1 %bb.5, implicit killed %eflags + JMP_1 %bb.4 + + bb.4: + successors: %bb.5 + + %ebx = MOV32ri 9 + JMP_1 %bb.5 + + bb.5: + successors: %bb.6, %bb.7 + + NOOP implicit-def %eflags + JNE_1 %bb.7, implicit killed %eflags + JMP_1 %bb.6 + + bb.6: + successors: %bb.7 + + %ebx = MOV32ri 9 + JMP_1 %bb.7 + + bb.7: + successors: %bb.15 + + JMP_1 %bb.15 + + bb.8: + successors: %bb.9, %bb.10 + + NOOP implicit-def %eflags + JNE_1 %bb.10, implicit killed %eflags + JMP_1 %bb.9 + + bb.9: + successors: %bb.11 + + JMP_1 %bb.11 + + bb.10: + successors: %bb.11 + + %ebx = MOV32ri 9 + JMP_1 %bb.11 + + bb.11: + successors: %bb.12, %bb.13 + + NOOP implicit-def %eflags + JNE_1 %bb.13, implicit killed %eflags + JMP_1 %bb.12 + + bb.12: + successors: %bb.14 + + JMP_1 %bb.14 + + bb.13: + successors: %bb.14 + + %ebx = MOV32ri 9 + JMP_1 %bb.14 + + bb.14: + successors: %bb.15 + JMP_1 %bb.15 + + + bb.15: + RET 0 +... +#CHECK-LABEL: fun + +#CHECK: BB#3 uses : %RBX +#CHECK-NEXT: BB#4 uses : %RBX +#CHECK-NEXT: BB#6 uses : %RBX +#CHECK-NEXT: BB#10 uses : %RBX +#CHECK-NEXT: BB#13 uses : %RBX +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#1: Saves: %RBX, | Restores: +#CHECK-NEXT: BB#7: Saves: | Restores: %RBX, +#CHECK-NEXT: BB#10: Saves: %RBX, | Restores: %RBX, +#CHECK-NEXT: BB#13: Saves: %RBX, | Restores: %RBX, Index: test/CodeGen/X86/ShrinkWrapping/SCCCriticalEdge.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/SCCCriticalEdge.mir @@ -0,0 +1,47 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRE: asserts +--- | + define void @fun() nounwind { + entry: + ret void + } +... +--- +name: fun +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.5 + + NOOP implicit-def %eflags + JE_1 %bb.1, implicit killed %eflags + JMP_1 %bb.5 + + bb.1: + successors: %bb.2 + + %rbx = MOV64ri 30 + JMP_1 %bb.2 + + bb.2: + successors: %bb.3 + + JMP_1 %bb.3 + + bb.3: + successors: %bb.3, %bb.5 + + NOOP implicit-def %eflags + JE_1 %bb.3, implicit killed %eflags + JMP_1 %bb.5 + + bb.5: + RET 0 + +... +#CHECK-LABEL: fun + +#CHECK: BB#1 uses : %RBX +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#1: Saves: %RBX, | Restores: +#CHECK-NEXT: BB#2: Saves: | Restores: %RBX, Index: test/CodeGen/X86/ShrinkWrapping/SaveBeforeLoop.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/SaveBeforeLoop.mir @@ -0,0 +1,58 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRE: asserts +--- | + define void @fun() nounwind { + entry: + ret void + } +... +--- +name: fun +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.4 + + NOOP implicit-def %eflags + JE_1 %bb.1, implicit killed %eflags + JMP_1 %bb.4 + + bb.1: + successors: %bb.2, %bb.5 + + NOOP implicit-def %eflags + JE_1 %bb.2, implicit killed %eflags + JMP_1 %bb.5 + + bb.2: + successors: %bb.3 + + %rbx = MOV64ri 30 + JMP_1 %bb.3 + + bb.3: + successors: %bb.3, %bb.5 + + NOOP implicit-def %eflags + JE_1 %bb.3, implicit killed %eflags + JMP_1 %bb.5 + + bb.5: + successors: %bb.5, %bb.6 + + NOOP implicit-def %eflags + JE_1 %bb.5, implicit killed %eflags + JMP_1 %bb.6 + + bb.4: + RET 0 + + bb.6: + RET 0 + +... +#CHECK-LABEL: fun + +#CHECK: BB#2 uses : %RBX +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#2: Saves: %RBX, | Restores: %RBX Index: test/CodeGen/X86/ShrinkWrapping/SimpleLoopBranch.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/SimpleLoopBranch.mir @@ -0,0 +1,41 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: asserts +--- | + define void @self_promoting_args_p() nounwind { + entry: + ret void + } +... +--- +name: self_promoting_args_p +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.3, %bb.2 + + NOOP implicit-def %eflags + JNE_1 %bb.3, implicit killed %eflags + JMP_1 %bb.2 + + bb.1: + successors: %bb.3, %bb.2 + + %rbx = MOV64ri 40 + NOOP implicit-def %eflags + JNE_1 %bb.3, implicit killed %eflags + JMP_1 %bb.2 + + bb.2: + successors: %bb.1 + + JMP_1 %bb.1 + + bb.3: + RET 0 +... +#CHECK-LABEL: self_promoting_args_p + +#CHECK: BB#1 uses : %RBX +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#0: Saves: %RBX, | Restores: +#CHECK-NEXT: BB#3: Saves: | Restores: %RBX, Index: test/CodeGen/X86/ShrinkWrapping/StackAlignment.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/StackAlignment.mir @@ -0,0 +1,40 @@ +# RUN: llc -disable-fp-elim -mtriple=x86_64-- -start-before=shrink-wrap2 -stop-after=prologepilog %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o - | FileCheck %s +# REQUIRE: asserts +# XFAIL: x86 +--- | + define void @fun() nounwind { + entry: + ret void + } +... +--- +name: fun +tracksRegLiveness: true +stack: + - { id: 0, offset: 0, size: 8, alignment: 8 } +body: | + bb.0: + successors: %bb.2, %bb.1 + + NOOP implicit-def %eflags + JE_1 %bb.2, implicit killed %eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.3 + + %rbx = COPY %rsp + %r14 = COPY %rsp + JMP_1 %bb.3 + + bb.2: + RET 0 + + bb.3: + liveins: %rbx + + %rax = MOV64rm %stack.0, %rbx, _, 0, _ + RET 0, %rax +... +#CHECK-LABEL: fun +#CHECK: %rsp = frame-setup SUB64ri8 %rsp, 16 Index: test/CodeGen/X86/ShrinkWrapping/Tree.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/Tree.mir @@ -0,0 +1,55 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: asserts +--- | + define void @foo() nounwind { + entry: + ret void + } +... +--- +name: foo +body: | + bb.0: + successors: %bb.1, %bb.4 + + JNE_1 %bb.4, implicit killed %eflags + JMP_1 %bb.1 + + bb.1: + successors: %bb.2, %bb.3 + + JNE_1 %bb.3, implicit killed %eflags + JMP_1 %bb.2 + + bb.2: + %ebx = MOV32ri 12 + RET 0, %eax + + bb.3: + %ebx = MOV32ri 12 + RET 0, %eax + + bb.4: + successors: %bb.5, %bb.6 + + CMP32ri8 killed %edi, 90, implicit-def %eflags + JNE_1 %bb.6, implicit killed %eflags + JMP_1 %bb.5 + + bb.5: + %ebx = MOV32ri 12 + RET 0, %eax + + bb.6: + RET 0, %eax +... +#CHECK-LABEL: foo + +#CHECK: BB#2 uses : %RBX +#CHECK-NEXT: BB#3 uses : %RBX +#CHECK-NEXT: BB#5 uses : %RBX +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#1: Saves: %RBX, | Restores: +#CHECK-NEXT: BB#2: Saves: | Restores: %RBX, +#CHECK-NEXT: BB#3: Saves: | Restores: %RBX, +#CHECK-NEXT: BB#5: Saves: %RBX, | Restores: %RBX, Index: test/CodeGen/X86/ShrinkWrapping/lit.local.cfg =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'X86' in config.root.targets: + config.unsupported = True Index: test/CodeGen/X86/ShrinkWrapping/optimize-max-0.mir =================================================================== --- /dev/null +++ test/CodeGen/X86/ShrinkWrapping/optimize-max-0.mir @@ -0,0 +1,49 @@ +# RUN: llc -mtriple=x86_64-- -run-pass=shrink-wrap2 %s -enable-shrink-wrap2=true -debug-only=shrink-wrap2 -o /dev/null 2>&1 | FileCheck %s +# REQUIRES: asserts +# This is a reduced test case from test/CodeGen/X86/optimize-max-0.ll +--- | + define void @foo() nounwind { + entry: + ret void + } +... +--- +name: foo +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.6, %bb.3 + + NOOP implicit-def %eflags + JE_1 %bb.6, implicit killed %eflags + JMP_1 %bb.3 + + bb.3: + successors: %bb.3, %bb.4 + + NOOP implicit-def %eflags + JNE_1 %bb.3, implicit killed %eflags + JMP_1 %bb.4 + + bb.4: + successors: %bb.6 + + JMP_1 %bb.6 + + bb.6: + successors: %bb.8 + + %ebx = MOV32ri 30 + JMP_1 %bb.8 + + bb.8: + + RET 0 + +... +#CHECK-LABEL: foo + +#CHECK: BB#3 uses : %RBX +#CHECK: **** Shrink-wrapping results +#CHECK-NEXT: BB#0: Saves: %RBX, | Restores: +#CHECK-NEXT: BB#4: Saves: | Restores: %RBX,