Index: include/llvm/CodeGen/MachineFrameInfo.h =================================================================== --- include/llvm/CodeGen/MachineFrameInfo.h +++ include/llvm/CodeGen/MachineFrameInfo.h @@ -246,6 +246,16 @@ /// True if this is a varargs function that contains a musttail call. bool HasMustTailInVarArgFunc; + /// Not null, if shrink-wrapping found a better place for the prologue. + MachineBasicBlock *Save; + /// Not null, if shrink-wrapping found a better place for the epilogue. + MachineBasicBlock *Restore; + + /// Check if it exists a path from \p MBB leading to the basic + /// block with a SavePoint (a.k.a. prologue). + bool isBeforeSavePoint(const MachineFunction &MF, + const MachineBasicBlock &MBB) const; + public: explicit MachineFrameInfo(unsigned StackAlign, bool isStackRealign, bool RealignOpt) @@ -269,6 +279,8 @@ HasInlineAsmWithSPAdjust = false; HasVAStart = false; HasMustTailInVarArgFunc = false; + Save = nullptr; + Restore = nullptr; } /// hasStackObjects - Return true if there are any stack objects in this @@ -597,6 +609,11 @@ void setCalleeSavedInfoValid(bool v) { CSIValid = v; } + MachineBasicBlock *getSavePoint() const { return Save; } + void setSavePoint(MachineBasicBlock *NewSave) { Save = NewSave; } + MachineBasicBlock *getRestorePoint() const { return Restore; } + void setRestorePoint(MachineBasicBlock *NewRestore) { Restore = NewRestore; } + /// getPristineRegs - Return a set of physical registers that are pristine on /// entry to the MBB. /// Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -120,6 +120,9 @@ /// Default setting for -enable-tail-merge on this target. bool EnableTailMerge; + /// Default setting for -enable-shrink-wrap on this target. + bool EnableShrinkWrap; + public: TargetPassConfig(TargetMachine *tm, PassManagerBase &pm); // Dummy constructor. @@ -179,6 +182,9 @@ /// Return true if the optimized regalloc pipeline is enabled. bool getOptimizeRegAlloc() const; + /// Return true if shrink wrapping is enabled. + bool getEnableShrinkWrap() const; + /// Return true if the default global register allocator is in use and /// has not be overriden on the command line with '-regalloc=...' bool usingDefaultRegAlloc() const; @@ -426,6 +432,10 @@ /// basic blocks. extern char &SpillPlacementID; + /// ShrinkWrap pass. Look for the best place to insert save and restore + // instruction and update the MachineFunctionInfo with that information. + extern char &ShrinkWrapID; + /// VirtRegRewriter pass. Rewrite virtual registers to physical registers as /// assigned in VirtRegMap. extern char &VirtRegRewriterID; Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -247,6 +247,7 @@ void initializeSROA_SSAUpPass(PassRegistry&); void initializeScalarEvolutionAliasAnalysisPass(PassRegistry&); void initializeScalarEvolutionPass(PassRegistry&); +void initializeShrinkWrapPass(PassRegistry &); void initializeSimpleInlinerPass(PassRegistry&); void initializeShadowStackGCLoweringPass(PassRegistry&); void initializeRegisterCoalescerPass(PassRegistry&); Index: include/llvm/Target/TargetFrameLowering.h =================================================================== --- include/llvm/Target/TargetFrameLowering.h +++ include/llvm/Target/TargetFrameLowering.h @@ -130,21 +130,26 @@ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - virtual void emitPrologue(MachineFunction &MF) const = 0; + virtual void emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const = 0; virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const = 0; /// Adjust the prologue to have the function use segmented stacks. This works /// by adding a check even before the "normal" function prologue. - virtual void adjustForSegmentedStacks(MachineFunction &MF) const { } + virtual void adjustForSegmentedStacks(MachineFunction &MF, + MachineBasicBlock &PrologueMBB) const {} /// Adjust the prologue to add Erlang Run-Time System (ERTS) specific code in /// the assembly prologue to explicitly handle the stack. - virtual void adjustForHiPEPrologue(MachineFunction &MF) const { } + virtual void adjustForHiPEPrologue(MachineFunction &MF, + MachineBasicBlock &PrologueMBB) const {} /// Adjust the prologue to add an allocation at a fixed offset from the frame /// pointer. - virtual void adjustForFrameAllocatePrologue(MachineFunction &MF) const { } + virtual void + adjustForFrameAllocatePrologue(MachineFunction &MF, + MachineBasicBlock &PrologueMBB) const {} /// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee /// saved registers and returns true if it isn't possible / profitable to do Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -93,6 +93,7 @@ ScheduleDAGInstrs.cpp ScheduleDAGPrinter.cpp ScoreboardHazardRecognizer.cpp + ShrinkWrap.cpp ShadowStackGC.cpp ShadowStackGCLowering.cpp SjLjEHPrepare.cpp Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -61,6 +61,7 @@ initializePostRASchedulerPass(Registry); initializeProcessImplicitDefsPass(Registry); initializeRegisterCoalescerPass(Registry); + initializeShrinkWrapPass(Registry); initializeSlotIndexesPass(Registry); initializeStackColoringPass(Registry); initializeStackMapLivenessPass(Registry); Index: lib/CodeGen/MachineFunction.cpp =================================================================== --- lib/CodeGen/MachineFunction.cpp +++ lib/CodeGen/MachineFunction.cpp @@ -600,8 +600,8 @@ for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR) BV.set(*CSR); - // The entry MBB always has all CSRs pristine. - if (MBB == &MF->front()) + // Each MBB before the save point has all CSRs pristine. + if (isBeforeSavePoint(*MF, *MBB)) return BV; // On other MBBs the saved CSRs are not pristine. @@ -613,6 +613,40 @@ return BV; } +// Note: We could use some sort of caching mecanism, but we lack the ability +// to know when the cache is invalid, i.e., the CFG changed. +// Assuming we have that, we can simply compute all the set of MBBs +// that are before the save point. +bool MachineFrameInfo::isBeforeSavePoint(const MachineFunction &MF, + const MachineBasicBlock &MBB) const { + // Early exit if shrink-wrapping did not kick. + if (!Save) + return &MBB == &MF.front(); + + // Starting from MBB, check if there is a path leading to Save that do + // not cross Restore. + SmallPtrSet Visited; + SmallVector WorkList; + WorkList.push_back(&MBB); + Visited.insert(&MBB); + do { + const MachineBasicBlock *CurBB = WorkList.pop_back_val(); + // By construction, the region that is after the save point is + // dominated by the Save and post-dominated by the Restore. + // If we do not reach Restore and still reach Save, this + // means MBB is before Save. + if (CurBB == Save) + return true; + if (CurBB == Restore) + continue; + // Enqueue all the successors not already visited. + for (MachineBasicBlock *SuccBB : CurBB->successors()) + if (Visited.insert(SuccBB).second) + WorkList.push_back(SuccBB); + } while (!WorkList.empty()); + return false; +} + unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); Index: lib/CodeGen/Passes.cpp =================================================================== --- lib/CodeGen/Passes.cpp +++ lib/CodeGen/Passes.cpp @@ -52,7 +52,10 @@ static cl::opt DisableMachineCSE("disable-machine-cse", cl::Hidden, cl::desc("Disable Machine Common Subexpression Elimination")); static cl::opt -OptimizeRegAlloc("optimize-regalloc", cl::Hidden, + EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, + cl::desc("enable the shrink-wrapping pass")); +static cl::opt OptimizeRegAlloc( + "optimize-regalloc", cl::Hidden, cl::desc("Enable optimized register allocation compilation path.")); static cl::opt DisablePostRAMachineLICM("disable-postra-machine-licm", cl::Hidden, @@ -206,10 +209,10 @@ // Out of line constructor provides default values for pass options and // registers all common codegen passes. TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) - : ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr), - Started(true), Stopped(false), AddingMachinePasses(false), TM(tm), - Impl(nullptr), Initialized(false), DisableVerify(false), - EnableTailMerge(true) { + : ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr), + Started(true), Stopped(false), AddingMachinePasses(false), TM(tm), + Impl(nullptr), Initialized(false), DisableVerify(false), + EnableTailMerge(true), EnableShrinkWrap(false) { Impl = new PassConfigImpl(); @@ -524,6 +527,8 @@ addPostRegAlloc(); // Insert prolog/epilog code. Eliminate abstract frame index references... + if (getEnableShrinkWrap()) + addPass(&ShrinkWrapID); addPass(&PrologEpilogCodeInserterID); /// Add passes that optimize machine instructions after register allocation. @@ -599,6 +604,21 @@ addPass(&DeadMachineInstructionElimID); } +bool TargetPassConfig::getEnableShrinkWrap() const { + switch (EnableShrinkWrapOpt) { + case cl::BOU_UNSET: + return EnableShrinkWrap && getOptLevel() != CodeGenOpt::None; + // If EnableShrinkWrap is set, it takes precedence on whatever the + // target sets. The rational is that we assume we want to test + // something related to shrink-wrapping. + case cl::BOU_TRUE: + return true; + case cl::BOU_FALSE: + return false; + } + llvm_unreachable("Invalid shrink-wrapping state"); +} + //===---------------------------------------------------------------------===// /// Register Allocation Pass Configuration //===---------------------------------------------------------------------===// Index: lib/CodeGen/PrologEpilogInserter.cpp =================================================================== --- lib/CodeGen/PrologEpilogInserter.cpp +++ lib/CodeGen/PrologEpilogInserter.cpp @@ -372,6 +372,61 @@ MFI->setCalleeSavedInfo(CSI); } +/// Helper function to update the liveness information for the callee-saved +/// registers. +static void updateLiveness(MachineFunction &MF) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + // Visited will contain all the basic blocks that are in the region + // where the callee saved registers are alive: + // - Anything that is not Save or Restore -> LiveThrough. + // - Save -> LiveIn. + // - Restore -> LiveOut. + // The live-out is not attached to the block, so no need to keep + // Restore in this set. + SmallPtrSet Visited; + SmallVector WorkList; + MachineBasicBlock *Entry = &MF.front(); + MachineBasicBlock *Save = MFI->getSavePoint(); + + if (!Save) + Save = Entry; + + if (Entry != Save) { + WorkList.push_back(Entry); + Visited.insert(Entry); + } + Visited.insert(Save); + + MachineBasicBlock *Restore = MFI->getRestorePoint(); + if (Restore) + // By construction Restore cannot be visited, otherwise it + // means there exists a path to Restore that does not go + // through Save. + WorkList.push_back(Restore); + + while (!WorkList.empty()) { + const MachineBasicBlock *CurBB = WorkList.pop_back_val(); + // By construction, the region that is after the save point is + // dominated by the Save and post-dominated by the Restore. + if (CurBB == Save) + continue; + // Enqueue all the successors not already visited. + // Those are by construction either before Save or after Restore. + for (MachineBasicBlock *SuccBB : CurBB->successors()) + if (Visited.insert(SuccBB).second) + WorkList.push_back(SuccBB); + } + + const std::vector &CSI = MFI->getCalleeSavedInfo(); + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + for (MachineBasicBlock *MBB : Visited) + // Add the callee-saved register as live-in. + // It's killed at the spill. + MBB->addLiveIn(CSI[i].getReg()); + } +} + /// insertCSRSpillsAndRestores - Insert spill and restore code for /// callee saved registers used in the function. /// @@ -392,26 +447,31 @@ MachineBasicBlock::iterator I; // Spill using target interface. - I = EntryBlock->begin(); - if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { + MachineBasicBlock *SavePoint = MFI->getSavePoint(); + if (!SavePoint) + SavePoint = EntryBlock; + I = SavePoint->begin(); + if (!TFI->spillCalleeSavedRegisters(*SavePoint, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - // Add the callee-saved register as live-in. - // It's killed at the spill. - EntryBlock->addLiveIn(CSI[i].getReg()); - // Insert the spill to the stack frame. unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, CSI[i].getFrameIdx(), + TII.storeRegToStackSlot(*SavePoint, I, Reg, true, CSI[i].getFrameIdx(), RC, TRI); } } + // Update the live-in information of all the blocks up to the save point. + updateLiveness(Fn); + SmallVector RestoreBlock; + SmallVectorImpl *RetBlocks = &ReturnBlocks; + if (MFI->getRestorePoint()) { + RestoreBlock.push_back(MFI->getRestorePoint()); + RetBlocks = &RestoreBlock; + } // Restore using target interface. - for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { - MachineBasicBlock *MBB = ReturnBlocks[ri]; + for (MachineBasicBlock *MBB : *RetBlocks) { I = MBB->end(); - --I; // Skip over all terminator instructions, which are part of the return // sequence. @@ -721,21 +781,27 @@ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); // Add prologue to the function... - TFI.emitPrologue(Fn); - - // Add epilogue to restore the callee-save registers in each exiting block - for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { - // If last instruction is a return instruction, add an epilogue - if (!I->empty() && I->back().isReturn()) - TFI.emitEpilogue(Fn, *I); - } + MachineFrameInfo *MFI = Fn.getFrameInfo(); + MachineBasicBlock &PrologueMBB = + MFI->getSavePoint() ? *MFI->getSavePoint() : Fn.front(); + TFI.emitPrologue(Fn, PrologueMBB); + + // Add epilogue to restore the callee-save registers in each exiting block. + if (MFI->getRestorePoint()) + TFI.emitEpilogue(Fn, *MFI->getRestorePoint()); + else + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { + // If last instruction is a return instruction, add an epilogue + if (!I->empty() && I->back().isReturn()) + TFI.emitEpilogue(Fn, *I); + } // Emit additional code that is required to support segmented stacks, if // we've been asked for it. This, when linked with a runtime with support // for segmented stacks (libgcc is one), will result in allocating stack // space in small chunks instead of one large contiguous block. if (Fn.shouldSplitStack()) - TFI.adjustForSegmentedStacks(Fn); + TFI.adjustForSegmentedStacks(Fn, PrologueMBB); // Emit additional code that is required to explicitly handle the stack in // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The @@ -743,7 +809,7 @@ // different conditional check and another BIF for allocating more stack // space. if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE) - TFI.adjustForHiPEPrologue(Fn); + TFI.adjustForHiPEPrologue(Fn, PrologueMBB); } /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical Index: lib/CodeGen/ShrinkWrap.cpp =================================================================== --- lib/CodeGen/ShrinkWrap.cpp +++ lib/CodeGen/ShrinkWrap.cpp @@ -0,0 +1,383 @@ +//===-- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass looks for safe point where the prologue and epilogue can be +// inserted. +// The safe point for the prologue (resp. epilogue) is called Save +// (resp. Restore). +// A point is safe for prologue (resp. epilogue) if and only if +// it 1) dominates (resp. post-dominates) all the frame related operations and +// between 2) two executions of the Save (resp. Restore) point there is an +// execution of the Restore (resp. Save) point. +// +// For instance, the following points are safe: +// for (int i = 0; i < 10; ++i) { +// Save +// ... +// Restore +// } +// Indeed, the execution looks like Save -> Restore -> Save -> Restore ... +// And the following points are not: +// for (int i = 0; i < 10; ++i) { +// Save +// ... +// } +// for (int i = 0; i < 10; ++i) { +// ... +// Restore +// } +// Indeed, the execution looks like Save -> Save -> ... -> Restore -> Restore. +// +// This pass also ensures that the safe points are 3) cheaper than the regular +// entry and exits blocks. +// +// Property #1 is ensured via the use of MachineDominatorTree and +// MachinePostDominatorTree. +// Property #2 is ensured via property #1 and MachineLoopInfo, i.e., both +// points must be in the same loop. +// Property #3 is ensured via the MachineBlockFrequencyInfo. +// +// If this pass found points matching all this properties, then +// MachineFrameInfo is updated this that information. +//===----------------------------------------------------------------------===// +#include "llvm/ADT/Statistic.h" +// To check for profitability. +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +// For property #1 for Save. +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +// To record the result of the analysis. +#include "llvm/CodeGen/MachineFrameInfo.h" +// For property #2. +#include "llvm/CodeGen/MachineLoopInfo.h" +// For property #1 for Restore. +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/Passes.h" +// To know about callee-saved. +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/Support/Debug.h" +// To know about frame setup operation. +#include "llvm/Target/TargetInstrInfo.h" +// To access TargetInstrInfo. +#include "llvm/Target/TargetSubtargetInfo.h" + +#define DEBUG_TYPE "shrink-wrap" + +using namespace llvm; + +STATISTIC(NumFunc, "Number of functions"); +STATISTIC(NumCandidates, "Number of shrink-wrapping candidates"); +STATISTIC(NumCandidatesDropped, + "Number of shrink-wrapping candidates dropped because of frequency"); + +namespace { +/// \brief Class to determine where the safe point to insert the +/// prologue and epilogue are. +/// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the +/// shrink-wrapping term for prologue/epilogue placement, this pass +/// does not rely on expensive data-flow analysis. Instead we use the +/// dominance properties and loop information to decide which point +/// are safe for such insertion. +class ShrinkWrap : public MachineFunctionPass { + /// Hold callee-saved information. + RegisterClassInfo RCI; + MachineDominatorTree *MDT; + MachinePostDominatorTree *MPDT; + /// Current safe point found for the prologue. + /// The prologue will be inserted before the first instruction + /// in this basic block. + MachineBasicBlock *Save; + /// Current safe point found for the epilogue. + /// The epilogue will be inserted before the first terminator instruction + /// in this basic block. + MachineBasicBlock *Restore; + /// Hold the information of the basic block frequency. + /// Use to check the profitability of the new points. + MachineBlockFrequencyInfo *MBFI; + /// Hold the loop information. Used to determine if Save and Restore + /// are in the same loop. + MachineLoopInfo *MLI; + /// Frequency of the Entry block. + uint64_t EntryFreq; + /// Current opcode for frame setup. + int FrameSetupOpcode; + /// Current opcode for frame destroy. + int FrameDestroyOpcode; + /// Entry block. + const MachineBasicBlock *Entry; + + /// \brief Check if \p MI uses or defines a callee-saved register or + /// a frame index. If this is the case, this means \p MI must happen + /// after Save and before Restore. + bool useOrDefCSROrFI(const MachineInstr &MI) const; + + /// \brief Update the Save and Restore points such that \p MBB is in + /// the region that is dominated by Save and post-dominated by Restore + /// and Save and Restore still match the safe point definition. + /// Such point may not exist and Save and/or Restore may be null after + /// this call. + void updateSaveRestorePoints(MachineBasicBlock &MBB); + + /// \brief Initialize the pass for \p MF. + void init(MachineFunction &MF) { + RCI.runOnMachineFunction(MF); + MDT = &getAnalysis(); + MPDT = &getAnalysis(); + Save = nullptr; + Restore = nullptr; + MBFI = &getAnalysis(); + MLI = &getAnalysis(); + EntryFreq = MBFI->getEntryFreq(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); + Entry = &MF.front(); + + ++NumFunc; + } + + /// Check whether or not Save and Restore points are still interesting for + /// shrink-wrapping. + bool ArePointsInteresting() const { return Save != Entry && Save && Restore; } + +public: + static char ID; + + ShrinkWrap() : MachineFunctionPass(ID) { + initializeShrinkWrapPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const override { + return "Shrink Wrapping analysis"; + } + + /// \brief Perform the shrink-wrapping analysis and update + /// the MachineFrameInfo attached to \p MF with the results. + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // End anonymous namespace. + +char ShrinkWrap::ID = 0; +char &llvm::ShrinkWrapID = ShrinkWrap::ID; + +INITIALIZE_PASS_BEGIN(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, + false) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false) + +bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI) const { + if (MI.getOpcode() == FrameSetupOpcode || + MI.getOpcode() == FrameDestroyOpcode) { + DEBUG(dbgs() << "Frame instruction: " << MI << '\n'); + return true; + } + for (const MachineOperand &MO : MI.operands()) { + bool UseCSR = false; + if (MO.isReg()) { + unsigned PhysReg = MO.getReg(); + if (!PhysReg) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && + "Unallocated register?!"); + UseCSR = RCI.getLastCalleeSavedAlias(PhysReg); + } + // TODO: Handle regmask more accurately. + // For now, be conservative about them. + if (UseCSR || MO.isFI() || MO.isRegMask()) { + DEBUG(dbgs() << "Use or define CSR(" << UseCSR << ") or FI(" << MO.isFI() + << "): " << MI << '\n'); + return true; + } + } + return false; +} + +/// \brief Helper function to find the immediate (post) dominator. +template +MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs, + DominanceAnalysis &Dom) { + MachineBasicBlock *IDom = &Block; + for (MachineBasicBlock *BB : BBs) { + IDom = Dom.findNearestCommonDominator(IDom, BB); + if (!IDom) + break; + } + return IDom; +} + +void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) { + // Get rid of the easy cases first. + if (!Save) + Save = &MBB; + else + Save = MDT->findNearestCommonDominator(Save, &MBB); + + if (!Save) { + DEBUG(dbgs() << "Found a block that is not reachable from Entry\n"); + return; + } + + if (!Restore) + Restore = &MBB; + else + Restore = MPDT->findNearestCommonDominator(Restore, &MBB); + + // Make sure we would be able to insert the restore code before the + // terminator. + if (Restore == &MBB) { + for (const MachineInstr &Terminator : MBB.terminators()) { + if (!useOrDefCSROrFI(Terminator)) + continue; + // One of the terminator needs to happen before the restore point. + if (MBB.succ_empty()) { + Restore = nullptr; + break; + } + // Look for a restore point that post-dominates all the successors. + // The immediate post-dominator is what we are looking for. + Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); + break; + } + } + + if (!Restore) { + DEBUG(dbgs() << "Restore point needs to be spanned on several blocks\n"); + return; + } + + // Make sure Save and Restore are suitable for shrink-wrapping: + // 1. all path from Save needs to lead to Restore before exiting. + // 2. all path to Restore needs to go through Save from Entry. + // We achieve that by making sure that: + // A. Save dominates Restore. + // B. Restore post-dominates Save. + // C. Save and Restore are in the same loop. + bool SaveDominatesRestore = false; + bool RestorePostDominatesSave = false; + while (Save && Restore && + (!(SaveDominatesRestore = MDT->dominates(Save, Restore)) || + !(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) || + MLI->getLoopFor(Save) != MLI->getLoopFor(Restore))) { + // Fix (A). + if (!SaveDominatesRestore) { + Save = MDT->findNearestCommonDominator(Save, Restore); + continue; + } + // Fix (B). + if (!RestorePostDominatesSave) + Restore = MPDT->findNearestCommonDominator(Restore, Save); + + // Fix (C). + if (Save && Restore && Save != Restore && + MLI->getLoopFor(Save) != MLI->getLoopFor(Restore)) { + if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) + // Push Save outside of this loop. + Save = FindIDom<>(*Save, Save->predecessors(), *MDT); + else + // Push Restore outside of this loop. + Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); + } + } +} + +bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { + if (MF.empty()) + return false; + DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); + + init(MF); + + for (MachineBasicBlock &MBB : MF) { + DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName() + << '\n'); + + for (const MachineInstr &MI : MBB) { + if (!useOrDefCSROrFI(MI)) + continue; + // Save (resp. restore) point must dominate (resp. post dominate) + // MI. Look for the proper basic block for those. + updateSaveRestorePoints(MBB); + // If we are at a point where we cannot improve the placement of + // save/restore instructions, just give up. + if (!ArePointsInteresting()) { + DEBUG(dbgs() << "No Shrink wrap candidate found\n"); + return false; + } + // No need to look for other instructions, this basic block + // will already be part of the handled region. + break; + } + } + if (!ArePointsInteresting()) { + // If the points are not interesting at this point, then they must be null + // because it means we did not encounter any frame/CSR related code. + // Otherwise, we would have returned from the previous loop. + assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!"); + DEBUG(dbgs() << "Nothing to shrink-wrap\n"); + return false; + } + + DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq + << '\n'); + + do { + DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: " + << Save->getNumber() << ' ' << Save->getName() << ' ' + << MBFI->getBlockFreq(Save).getFrequency() << "\nRestore: " + << Restore->getNumber() << ' ' << Restore->getName() << ' ' + << MBFI->getBlockFreq(Restore).getFrequency() << '\n'); + + bool IsSaveCheap; + if ((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save).getFrequency()) && + EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) + break; + DEBUG(dbgs() << "New points are too expensive\n"); + MachineBasicBlock *NewBB; + if (!IsSaveCheap) { + Save = FindIDom<>(*Save, Save->predecessors(), *MDT); + if (!Save) + break; + NewBB = Save; + } else { + // Restore is expensive. + Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); + if (!Restore) + break; + NewBB = Restore; + } + updateSaveRestorePoints(*NewBB); + } while (Save && Restore); + + if (!ArePointsInteresting()) { + ++NumCandidatesDropped; + return false; + } + + DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " << Save->getNumber() + << ' ' << Save->getName() << "\nRestore: " + << Restore->getNumber() << ' ' << Restore->getName() << '\n'); + + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setSavePoint(Save); + MFI->setRestorePoint(Restore); + ++NumCandidates; + return false; +} Index: lib/Target/AArch64/AArch64FrameLowering.h =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.h +++ lib/Target/AArch64/AArch64FrameLowering.h @@ -34,7 +34,7 @@ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; Index: lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.cpp +++ lib/Target/AArch64/AArch64FrameLowering.cpp @@ -250,8 +250,8 @@ } } -void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. +void AArch64FrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); @@ -530,15 +530,17 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); MachineFrameInfo *MFI = MF.getFrameInfo(); const AArch64InstrInfo *TII = static_cast(MF.getSubtarget().getInstrInfo()); const AArch64RegisterInfo *RegInfo = static_cast( MF.getSubtarget().getRegisterInfo()); - DebugLoc DL = MBBI->getDebugLoc(); - unsigned RetOpcode = MBBI->getOpcode(); - + DebugLoc DL; + unsigned RetOpcode = 0; + if (MBB.end() != MBBI) { + DL = MBBI->getDebugLoc(); + RetOpcode = MBBI->getOpcode(); + } int NumBytes = MFI->getStackSize(); const AArch64FunctionInfo *AFI = MF.getInfo(); @@ -595,7 +597,7 @@ unsigned NumRestores = 0; // Move past the restores of the callee-saved registers. - MachineBasicBlock::iterator LastPopI = MBBI; + MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); if (LastPopI != MBB.begin()) { do { Index: lib/Target/ARM/ARMFrameLowering.h =================================================================== --- lib/Target/ARM/ARMFrameLowering.h +++ lib/Target/ARM/ARMFrameLowering.h @@ -28,7 +28,7 @@ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void fixTCReturn(MachineFunction &MF, MachineBasicBlock &MBB) const; @@ -55,7 +55,8 @@ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const override; - void adjustForSegmentedStacks(MachineFunction &MF) const override; + void adjustForSegmentedStacks(MachineFunction &MF, + MachineBasicBlock &MBB) const override; private: void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Index: lib/Target/ARM/ARMFrameLowering.cpp =================================================================== --- lib/Target/ARM/ARMFrameLowering.cpp +++ lib/Target/ARM/ARMFrameLowering.cpp @@ -278,8 +278,9 @@ } } -void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); +void ARMFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented"); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); @@ -1861,7 +1862,8 @@ // ARM can be found at [1]. // // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S -void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { +void ARMFrameLowering::adjustForSegmentedStacks( + MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { unsigned Opcode; unsigned CFIIndex; const ARMSubtarget *ST = &MF.getSubtarget(); @@ -1874,7 +1876,7 @@ if (!ST->isTargetAndroid() && !ST->isTargetLinux()) report_fatal_error("Segmented stacks not supported on this platform."); - MachineBasicBlock &prologueMBB = MF.front(); + assert(&PrologueMBB == &MF.front() && "Shrink-wrapping not yet implemented"); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); MCContext &Context = MMI.getContext(); @@ -1902,8 +1904,8 @@ MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock(); MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock(); - for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(), - e = prologueMBB.livein_end(); + for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(), + e = PrologueMBB.livein_end(); i != e; ++i) { AllocMBB->addLiveIn(*i); GetMBB->addLiveIn(*i); @@ -2156,7 +2158,7 @@ .addCFIIndex(CFIIndex); // Organizing MBB lists - PostStackMBB->addSuccessor(&prologueMBB); + PostStackMBB->addSuccessor(&PrologueMBB); AllocMBB->addSuccessor(PostStackMBB); Index: lib/Target/ARM/Thumb1FrameLowering.h =================================================================== --- lib/Target/ARM/Thumb1FrameLowering.h +++ lib/Target/ARM/Thumb1FrameLowering.h @@ -27,7 +27,7 @@ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, Index: lib/Target/ARM/Thumb1FrameLowering.cpp =================================================================== --- lib/Target/ARM/Thumb1FrameLowering.cpp +++ lib/Target/ARM/Thumb1FrameLowering.cpp @@ -82,8 +82,9 @@ MBB.erase(I); } -void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); +void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented"); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); Index: lib/Target/BPF/BPFFrameLowering.h =================================================================== --- lib/Target/BPF/BPFFrameLowering.h +++ lib/Target/BPF/BPFFrameLowering.h @@ -24,7 +24,7 @@ explicit BPFFrameLowering(const BPFSubtarget &sti) : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0) {} - void emitPrologue(MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool hasFP(const MachineFunction &MF) const override; Index: lib/Target/BPF/BPFFrameLowering.cpp =================================================================== --- lib/Target/BPF/BPFFrameLowering.cpp +++ lib/Target/BPF/BPFFrameLowering.cpp @@ -23,7 +23,8 @@ bool BPFFrameLowering::hasFP(const MachineFunction &MF) const { return true; } -void BPFFrameLowering::emitPrologue(MachineFunction &MF) const {} +void BPFFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const {} void BPFFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {} Index: lib/Target/Hexagon/HexagonFrameLowering.h =================================================================== --- lib/Target/Hexagon/HexagonFrameLowering.h +++ lib/Target/Hexagon/HexagonFrameLowering.h @@ -26,7 +26,7 @@ explicit HexagonFrameLowering() : TargetFrameLowering(StackGrowsDown, 8, 0, 1, true) {} - void emitPrologue(MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool targetHandlesStackFrameRounding() const override { return true; Index: lib/Target/Hexagon/HexagonFrameLowering.cpp =================================================================== --- lib/Target/Hexagon/HexagonFrameLowering.cpp +++ lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -188,8 +188,9 @@ return Max; } -void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); +void HexagonFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); MachineBasicBlock::iterator MBBI = MBB.begin(); Index: lib/Target/MSP430/MSP430FrameLowering.h =================================================================== --- lib/Target/MSP430/MSP430FrameLowering.h +++ lib/Target/MSP430/MSP430FrameLowering.h @@ -27,7 +27,7 @@ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void eliminateCallFramePseudoInstr(MachineFunction &MF, Index: lib/Target/MSP430/MSP430FrameLowering.cpp =================================================================== --- lib/Target/MSP430/MSP430FrameLowering.cpp +++ lib/Target/MSP430/MSP430FrameLowering.cpp @@ -39,8 +39,9 @@ return !MF.getFrameInfo()->hasVarSizedObjects(); } -void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB +void MSP430FrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo *MFI = MF.getFrameInfo(); MSP430MachineFunctionInfo *MSP430FI = MF.getInfo(); const MSP430InstrInfo &TII = Index: lib/Target/Mips/Mips16FrameLowering.h =================================================================== --- lib/Target/Mips/Mips16FrameLowering.h +++ lib/Target/Mips/Mips16FrameLowering.h @@ -23,7 +23,7 @@ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, Index: lib/Target/Mips/Mips16FrameLowering.cpp =================================================================== --- lib/Target/Mips/Mips16FrameLowering.cpp +++ lib/Target/Mips/Mips16FrameLowering.cpp @@ -32,8 +32,9 @@ Mips16FrameLowering::Mips16FrameLowering(const MipsSubtarget &STI) : MipsFrameLowering(STI, STI.stackAlignment()) {} -void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); +void Mips16FrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo *MFI = MF.getFrameInfo(); const Mips16InstrInfo &TII = *static_cast(STI.getInstrInfo()); Index: lib/Target/Mips/MipsSEFrameLowering.h =================================================================== --- lib/Target/Mips/MipsSEFrameLowering.h +++ lib/Target/Mips/MipsSEFrameLowering.h @@ -24,7 +24,7 @@ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, Index: lib/Target/Mips/MipsSEFrameLowering.cpp =================================================================== --- lib/Target/Mips/MipsSEFrameLowering.cpp +++ lib/Target/Mips/MipsSEFrameLowering.cpp @@ -364,8 +364,9 @@ MipsSEFrameLowering::MipsSEFrameLowering(const MipsSubtarget &STI) : MipsFrameLowering(STI, STI.stackAlignment()) {} -void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); +void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo(); Index: lib/Target/NVPTX/NVPTXFrameLowering.h =================================================================== --- lib/Target/NVPTX/NVPTXFrameLowering.h +++ lib/Target/NVPTX/NVPTXFrameLowering.h @@ -23,7 +23,7 @@ explicit NVPTXFrameLowering(); bool hasFP(const MachineFunction &MF) const override; - void emitPrologue(MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void Index: lib/Target/NVPTX/NVPTXFrameLowering.cpp =================================================================== --- lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -31,9 +31,10 @@ bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; } -void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { +void NVPTXFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { if (MF.getFrameInfo()->hasStackObjects()) { - MachineBasicBlock &MBB = MF.front(); + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); // Insert "mov.u32 %SP, %Depot" MachineBasicBlock::iterator MBBI = MBB.begin(); // This instruction really occurs before first instruction Index: lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp =================================================================== --- lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp +++ lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp @@ -68,7 +68,7 @@ } // Add function prolog/epilog - TFI.emitPrologue(MF); + TFI.emitPrologue(MF, MF.front()); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { // If last instruction is a return instruction, add an epilogue Index: lib/Target/PowerPC/PPCFrameLowering.h =================================================================== --- lib/Target/PowerPC/PPCFrameLowering.h +++ lib/Target/PowerPC/PPCFrameLowering.h @@ -38,7 +38,7 @@ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool hasFP(const MachineFunction &MF) const override; Index: lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCFrameLowering.cpp +++ lib/Target/PowerPC/PPCFrameLowering.cpp @@ -555,8 +555,9 @@ } } -void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB +void PPCFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); const PPCInstrInfo &TII = Index: lib/Target/R600/AMDGPUFrameLowering.h =================================================================== --- lib/Target/R600/AMDGPUFrameLowering.h +++ lib/Target/R600/AMDGPUFrameLowering.h @@ -37,7 +37,7 @@ int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override; - void emitPrologue(MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool hasFP(const MachineFunction &MF) const override; }; Index: lib/Target/R600/AMDGPUFrameLowering.cpp =================================================================== --- lib/Target/R600/AMDGPUFrameLowering.cpp +++ lib/Target/R600/AMDGPUFrameLowering.cpp @@ -99,9 +99,8 @@ NumEntries = 0; return nullptr; } -void -AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const { -} +void AMDGPUFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const {} void AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { Index: lib/Target/Sparc/SparcFrameLowering.h =================================================================== --- lib/Target/Sparc/SparcFrameLowering.h +++ lib/Target/Sparc/SparcFrameLowering.h @@ -26,7 +26,7 @@ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void Index: lib/Target/Sparc/SparcFrameLowering.cpp =================================================================== --- lib/Target/Sparc/SparcFrameLowering.cpp +++ lib/Target/Sparc/SparcFrameLowering.cpp @@ -82,10 +82,11 @@ .addReg(SP::O6).addReg(SP::G1); } -void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { +void SparcFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { SparcMachineFunctionInfo *FuncInfo = MF.getInfo(); - MachineBasicBlock &MBB = MF.front(); + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo *MFI = MF.getFrameInfo(); const SparcInstrInfo &TII = *static_cast(MF.getSubtarget().getInstrInfo()); Index: lib/Target/SystemZ/SystemZFrameLowering.h =================================================================== --- lib/Target/SystemZ/SystemZFrameLowering.h +++ lib/Target/SystemZ/SystemZFrameLowering.h @@ -40,7 +40,7 @@ override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override; - void emitPrologue(MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool hasFP(const MachineFunction &MF) const override; int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; Index: lib/Target/SystemZ/SystemZFrameLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZFrameLowering.cpp +++ lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -309,8 +309,9 @@ } } -void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); +void SystemZFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineFrameInfo *MFFrame = MF.getFrameInfo(); auto *ZII = static_cast(MF.getSubtarget().getInstrInfo()); Index: lib/Target/X86/X86FrameLowering.h =================================================================== --- lib/Target/X86/X86FrameLowering.h +++ lib/Target/X86/X86FrameLowering.h @@ -35,12 +35,14 @@ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - void adjustForSegmentedStacks(MachineFunction &MF) const override; + void adjustForSegmentedStacks(MachineFunction &MF, + MachineBasicBlock &PrologueMBB) const override; - void adjustForHiPEPrologue(MachineFunction &MF) const override; + void adjustForHiPEPrologue(MachineFunction &MF, + MachineBasicBlock &PrologueMBB) const override; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = nullptr) const override; Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -565,8 +565,9 @@ - for 32-bit code, substitute %e?? registers for %r?? */ -void X86FrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. +void X86FrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); @@ -1590,9 +1591,10 @@ // limit. static const uint64_t kSplitStackAvailable = 256; -void -X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { - MachineBasicBlock &prologueMBB = MF.front(); +void X86FrameLowering::adjustForSegmentedStacks( + MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { + assert(&PrologueMBB == &MF.front() && + "Shrink-wrapping is not implemented yet"); MachineFrameInfo *MFI = MF.getFrameInfo(); const X86Subtarget &STI = MF.getSubtarget(); const TargetInstrInfo &TII = *STI.getInstrInfo(); @@ -1634,8 +1636,9 @@ // The MOV R10, RAX needs to be in a different block, since the RET we emit in // allocMBB needs to be last (terminating) instruction. - for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(), - e = prologueMBB.livein_end(); i != e; i++) { + for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(), + e = PrologueMBB.livein_end(); + i != e; i++) { allocMBB->addLiveIn(*i); checkMBB->addLiveIn(*i); } @@ -1749,7 +1752,7 @@ // This jump is taken if SP >= (Stacklet Limit + Stack Space required). // It jumps to normal execution of the function body. - BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&prologueMBB); + BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&PrologueMBB); // On 32 bit we first push the arguments size and then the frame size. On 64 // bit, we pass the stack frame size in r10 and the argument size in r11. @@ -1816,10 +1819,10 @@ else BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET)); - allocMBB->addSuccessor(&prologueMBB); + allocMBB->addSuccessor(&PrologueMBB); checkMBB->addSuccessor(allocMBB); - checkMBB->addSuccessor(&prologueMBB); + checkMBB->addSuccessor(&PrologueMBB); #ifdef XDEBUG MF.verify(); @@ -1841,7 +1844,8 @@ /// call inc_stack # doubles the stack space /// temp0 = sp - MaxStack /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart -void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { +void X86FrameLowering::adjustForHiPEPrologue( + MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { const X86Subtarget &STI = MF.getSubtarget(); const TargetInstrInfo &TII = *STI.getInstrInfo(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -1910,12 +1914,14 @@ // If the stack frame needed is larger than the guaranteed then runtime checks // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. if (MaxStack > Guaranteed) { - MachineBasicBlock &prologueMBB = MF.front(); + assert(&PrologueMBB == &MF.front() && + "Shrink-wrapping is not implemented yet"); MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); - for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(), - E = prologueMBB.livein_end(); I != E; I++) { + for (MachineBasicBlock::livein_iterator I = PrologueMBB.livein_begin(), + E = PrologueMBB.livein_end(); + I != E; I++) { stackCheckMBB->addLiveIn(*I); incStackMBB->addLiveIn(*I); } @@ -1951,7 +1957,7 @@ // SPLimitOffset is in a fixed heap location (pointed by BP). addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)) .addReg(ScratchReg), PReg, false, SPLimitOffset); - BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&prologueMBB); + BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&PrologueMBB); // Create new MBB for IncStack: BuildMI(incStackMBB, DL, TII.get(CALLop)). @@ -1962,9 +1968,9 @@ .addReg(ScratchReg), PReg, false, SPLimitOffset); BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB); - stackCheckMBB->addSuccessor(&prologueMBB, 99); + stackCheckMBB->addSuccessor(&PrologueMBB, 99); stackCheckMBB->addSuccessor(incStackMBB, 1); - incStackMBB->addSuccessor(&prologueMBB, 99); + incStackMBB->addSuccessor(&PrologueMBB, 99); incStackMBB->addSuccessor(incStackMBB, 1); } #ifdef XDEBUG Index: lib/Target/XCore/XCoreFrameLowering.h =================================================================== --- lib/Target/XCore/XCoreFrameLowering.h +++ lib/Target/XCore/XCoreFrameLowering.h @@ -27,7 +27,8 @@ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; Index: lib/Target/XCore/XCoreFrameLowering.cpp =================================================================== --- lib/Target/XCore/XCoreFrameLowering.cpp +++ lib/Target/XCore/XCoreFrameLowering.cpp @@ -220,8 +220,9 @@ MF.getFrameInfo()->hasVarSizedObjects(); } -void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB +void XCoreFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo *MMI = &MF.getMMI(); Index: test/CodeGen/AArch64/arm64-shrink-wrapping.ll =================================================================== --- test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -0,0 +1,502 @@ +; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE +; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios" + + +; Initial motivating example: Simple diamond with a call just on one side. +; CHECK-LABEL: foo: +; +; Compare the arguments and jump to exit. +; No prologue needed. +; ENABLE: cmp w0, w1 +; ENABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; CHECK: stp [[SAVE_SP:x[0-9]+]], [[CSR:x[0-9]+]], [sp, #-16]! +; CHECK-NEXT: mov [[SAVE_SP]], sp +; CHECK-NEXT: sub sp, sp, #16 +; +; Compare the arguments and jump to exit. +; After the prologue is set. +; DISABLE: cmp w0, w1 +; DISABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]] +; +; Store %a in the alloca. +; CHECK: stur w0, {{\[}}[[SAVE_SP]], #-4] +; Set the alloca address in the second argument. +; CHECK-NEXT: sub x1, [[SAVE_SP]], #4 +; Set the first argument to zero. +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: bl _doSomething +; +; Without shrink-wrapping, epilogue is in the exit block. +; DISABLE: [[EXIT_LABEL]]: +; Epilogue code. +; CHECK-NEXT: mov sp, [[SAVE_SP]] +; CHECK-NEXT: ldp [[SAVE_SP]], [[CSR]], [sp], #16 +; +; With shrink-wrapping, exit block is a simple return. +; ENABLE: [[EXIT_LABEL]]: +; CHECK-NEXT: ret +define i32 @foo(i32 %a, i32 %b) { + %tmp = alloca i32, align 4 + %tmp2 = icmp slt i32 %a, %b + br i1 %tmp2, label %true, label %false + +true: + store i32 %a, i32* %tmp, align 4 + %tmp4 = call i32 @doSomething(i32 0, i32* %tmp) + br label %false + +false: + %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] + ret i32 %tmp.0 +} + +; Function Attrs: optsize +declare i32 @doSomething(i32, i32*) + + +; Check that we do not perform the restore inside the loop whereas the save +; is outside. +; CHECK-LABEL: freqSaveAndRestoreOutsideLoop: +; +; Shrink-wrapping allows to skip the prologue in the else case. +; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]! +; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16] +; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16 +; +; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; CHECK: mov [[SUM:w[0-9]+]], wzr +; CHECK-NEXT: movz [[IV:w[0-9]+]], #0xa +; +; Next BB. +; CHECK: [[LOOP:LBB[0-9_]+]]: ; %for.body +; CHECK: bl _something +; CHECK-NEXT: add [[SUM]], w0, [[SUM]] +; CHECK-NEXT: sub [[IV]], [[IV]], #1 +; CHECK-NEXT: cbnz [[IV]], [[LOOP]] +; +; Next BB. +; Copy SUM into the returned register + << 3. +; CHECK: lsl w0, [[SUM]], #3 +; +; Jump to epilogue. +; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]] +; +; DISABLE: [[ELSE_LABEL]]: ; %if.else +; Shift second argument by one and store into returned register. +; DISABLE: lsl w0, w1, #1 +; DISABLE: [[EPILOG_BB]]: ; %if.end +; +; Epilogue code. +; CHECK: ldp [[CSR3]], [[CSR4]], [sp, #16] +; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32 +; CHECK-NEXT: ret +; +; ENABLE: [[ELSE_LABEL]]: ; %if.else +; Shift second argument by one and store into returned register. +; ENABLE: lsl w0, w1, #1 +; ENABLE: ret +define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.else, label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %sum.04 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() + %add = add nsw i32 %call, %sum.04 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond = icmp eq i32 %inc, 10 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + %shl = shl i32 %add, 3 + br label %if.end + +if.else: ; preds = %entry + %mul = shl nsw i32 %N, 1 + br label %if.end + +if.end: ; preds = %if.else, %for.end + %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] + ret i32 %sum.1 +} + +declare i32 @something(...) + +; Check that we do not perform the shrink-wrapping inside the loop even +; though that would be legal. The cost model must prevent that. +; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2: +; Prologue code. +; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]! +; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16] +; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16 +; CHECK: mov [[SUM:w[0-9]+]], wzr +; CHECK-NEXT: movz [[IV:w[0-9]+]], #0xa +; Next BB. +; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body +; CHECK: bl _something +; CHECK-NEXT: add [[SUM]], w0, [[SUM]] +; CHECK-NEXT: sub [[IV]], [[IV]], #1 +; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]] +; Next BB. +; CHECK: ; %for.end +; CHECK: mov w0, [[SUM]] +; CHECK-NEXT: ldp [[CSR3]], [[CSR4]], [sp, #16] +; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32 +; CHECK-NEXT: ret +define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %sum.03 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() + %add = add nsw i32 %call, %sum.03 + %inc = add nuw nsw i32 %i.04, 1 + %exitcond = icmp eq i32 %inc, 10 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret i32 %add +} + +; Check with a more complex case that we do not have save within the loop and +; restore outside. +; CHECK-LABEL: loopInfoSaveOutsideLoop: +; +; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]! +; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16] +; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16 +; +; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; CHECK: mov [[SUM:w[0-9]+]], wzr +; CHECK-NEXT: movz [[IV:w[0-9]+]], #0xa +; +; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body +; CHECK: bl _something +; CHECK-NEXT: add [[SUM]], w0, [[SUM]] +; CHECK-NEXT: sub [[IV]], [[IV]], #1 +; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]] +; Next BB. +; CHECK: bl _somethingElse +; CHECK-NEXT: lsl w0, [[SUM]], #3 +; +; Jump to epilogue. +; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]] +; +; DISABLE: [[ELSE_LABEL]]: ; %if.else +; Shift second argument by one and store into returned register. +; DISABLE: lsl w0, w1, #1 +; DISABLE: [[EPILOG_BB]]: ; %if.end +; Epilogue code. +; CHECK-NEXT: ldp [[CSR3]], [[CSR4]], [sp, #16] +; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32 +; CHECK-NEXT: ret +; +; ENABLE: [[ELSE_LABEL]]: ; %if.else +; Shift second argument by one and store into returned register. +; ENABLE: lsl w0, w1, #1 +; ENABLE: ret +define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.else, label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %sum.04 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() + %add = add nsw i32 %call, %sum.04 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond = icmp eq i32 %inc, 10 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + tail call void bitcast (void (...)* @somethingElse to void ()*)() + %shl = shl i32 %add, 3 + br label %if.end + +if.else: ; preds = %entry + %mul = shl nsw i32 %N, 1 + br label %if.end + +if.end: ; preds = %if.else, %for.end + %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] + ret i32 %sum.1 +} + +declare void @somethingElse(...) + +; Check with a more complex case that we do not have restore within the loop and +; save outside. +; CHECK-LABEL: loopInfoRestoreOutsideLoop: +; +; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]! +; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16] +; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16 +; +; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; CHECK: bl _somethingElse +; CHECK-NEXT: mov [[SUM:w[0-9]+]], wzr +; CHECK-NEXT: movz [[IV:w[0-9]+]], #0xa +; +; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body +; CHECK: bl _something +; CHECK-NEXT: add [[SUM]], w0, [[SUM]] +; CHECK-NEXT: sub [[IV]], [[IV]], #1 +; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]] +; Next BB. +; CHECK: lsl w0, [[SUM]], #3 +; +; Jump to epilogue. +; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]] +; +; DISABLE: [[ELSE_LABEL]]: ; %if.else +; Shift second argument by one and store into returned register. +; DISABLE: lsl w0, w1, #1 +; DISABLE: [[EPILOG_BB]]: ; %if.end +; Epilogue code. +; CHECK: ldp [[CSR3]], [[CSR4]], [sp, #16] +; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32 +; CHECK-NEXT: ret +; +; ENABLE: [[ELSE_LABEL]]: ; %if.else +; Shift second argument by one and store into returned register. +; ENABLE: lsl w0, w1, #1 +; ENABLE: ret +define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + tail call void bitcast (void (...)* @somethingElse to void ()*)() + br label %for.body + +for.body: ; preds = %for.body, %if.then + %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ] + %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ] + %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() + %add = add nsw i32 %call, %sum.04 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond = icmp eq i32 %inc, 10 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + %shl = shl i32 %add, 3 + br label %if.end + +if.else: ; preds = %entry + %mul = shl nsw i32 %N, 1 + br label %if.end + +if.end: ; preds = %if.else, %for.end + %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ] + ret i32 %sum.1 +} + +; Check that we handle function with no frame information correctly. +; CHECK-LABEL: emptyFrame: +; CHECK: ; %entry +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +define i32 @emptyFrame() { +entry: + ret i32 0 +} + +; Check that we handle variadic function correctly. +; CHECK-LABEL: variadicFunc: +; +; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; CHECK: sub sp, sp, #16 +; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; Sum is merged with the returned register. +; CHECK: mov [[SUM:w0]], wzr +; CHECK-NEXT: add [[VA_BASE:x[0-9]+]], sp, #16 +; CHECK-NEXT: str [[VA_BASE]], [sp, #8] +; CHECK-NEXT: cmp w1, #1 +; CHECK-NEXT: b.lt [[IFEND_LABEL:LBB[0-9_]+]] +; +; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body +; CHECK: ldr [[VA_ADDR:x[0-9]+]], [sp, #8] +; CHECK-NEXT: add [[NEXT_VA_ADDR:x[0-9]+]], [[VA_ADDR]], #8 +; CHECK-NEXT: str [[NEXT_VA_ADDR]], [sp, #8] +; CHECK-NEXT: ldr [[VA_VAL:w[0-9]+]], {{\[}}[[VA_ADDR]]] +; CHECK-NEXT: add [[SUM]], [[SUM]], [[VA_VAL]] +; CHECK-NEXT: sub w1, w1, #1 +; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]] +; +; DISABLE-NEXT: b [[IFEND_LABEL]] +; DISABLE: [[ELSE_LABEL]]: ; %if.else +; DISABLE: lsl w0, w1, #1 +; +; CHECK: [[IFEND_LABEL]]: +; Epilogue code. +; CHECK: add sp, sp, #16 +; CHECK-NEXT: ret +; +; ENABLE: [[ELSE_LABEL]]: ; %if.else +; ENABLE: lsl w0, w1, #1 +; ENABLE-NEXT: ret +define i32 @variadicFunc(i32 %cond, i32 %count, ...) #0 { +entry: + %ap = alloca i8*, align 8 + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %cmp6 = icmp sgt i32 %count, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %if.then, %for.body + %i.08 = phi i32 [ %inc, %for.body ], [ 0, %if.then ] + %sum.07 = phi i32 [ %add, %for.body ], [ 0, %if.then ] + %0 = va_arg i8** %ap, i32 + %add = add nsw i32 %sum.07, %0 + %inc = add nuw nsw i32 %i.08, 1 + %exitcond = icmp eq i32 %inc, %count + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %if.then + %sum.0.lcssa = phi i32 [ 0, %if.then ], [ %add, %for.body ] + call void @llvm.va_end(i8* %ap1) + br label %if.end + +if.else: ; preds = %entry + %mul = shl nsw i32 %count, 1 + br label %if.end + +if.end: ; preds = %if.else, %for.end + %sum.1 = phi i32 [ %sum.0.lcssa, %for.end ], [ %mul, %if.else ] + ret i32 %sum.1 +} + +declare void @llvm.va_start(i8*) + +declare void @llvm.va_end(i8*) + +; Check that we handle inline asm correctly. +; CHECK-LABEL: inlineAsm: +; +; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; Make sure we save the CSR used in the inline asm: x19. +; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x19]], [sp, #-16]! +; +; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; CHECK: movz [[IV:w[0-9]+]], #0xa +; +; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body +; Inline asm statement. +; CHECK: add x19, x19, #1 +; CHECK: sub [[IV]], [[IV]], #1 +; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]] +; Next BB. +; CHECK: mov w0, wzr +; Epilogue code. +; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16 +; CHECK-NEXT: ret +; Next BB. +; CHECK: [[ELSE_LABEL]]: ; %if.else +; CHECK-NEXT: lsl w0, w1, #1 +; Epilogue code. +; DISABLE-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16 +; CHECK-NEXT: ret +define i32 @inlineAsm(i32 %cond, i32 %N) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.else, label %for.body + +for.body: ; preds = %entry, %for.body + %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + tail call void asm sideeffect "add x19, x19, #1", "~{x19}"() + %inc = add nuw nsw i32 %i.03, 1 + %exitcond = icmp eq i32 %inc, 10 + br i1 %exitcond, label %if.end, label %for.body + +if.else: ; preds = %entry + %mul = shl nsw i32 %N, 1 + br label %if.end + +if.end: ; preds = %for.body, %if.else + %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.body ] + ret i32 %sum.0 +} + +; Check that we handle calls to variadic functions correctly. +; CHECK-LABEL: callVariadicFunc: +; +; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-16]! +; CHECK-NEXT: mov [[NEW_SP:x[0-9]+]], sp +; CHECK-NEXT: sub sp, sp, #48 +; +; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] +; Setup of the varags. +; CHECK: stp x1, x1, [sp, #32] +; CHECK-NEXT: stp x1, x1, [sp, #16] +; CHECK-NEXT: stp x1, x1, [sp] +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: bl _someVariadicFunc +; CHECK-NEXT: lsl w0, w0, #3 +; +; DISABLE: b [[IFEND_LABEL:LBB[0-9_]+]] +; DISABLE: [[ELSE_LABEL]]: ; %if.else +; DISABLE-NEXT: lsl w0, w1, #1 +; DISABLE: [[IFEND_LABEL]]: ; %if.end +; +; Epilogue code. +; CHECK: mov sp, [[NEW_SP]] +; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16 +; CHECK-NEXT: ret +; +; ENABLE: [[ELSE_LABEL]]: ; %if.else +; ENABLE-NEXT: lsl w0, w1, #1 +; ENABLE-NEXT: ret +define i32 @callVariadicFunc(i32 %cond, i32 %N) { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N) + %shl = shl i32 %call, 3 + br label %if.end + +if.else: ; preds = %entry + %mul = shl nsw i32 %N, 1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ] + ret i32 %sum.0 +} + +declare i32 @someVariadicFunc(i32, ...)