Index: include/llvm-c/Transforms/Scalar.h =================================================================== --- include/llvm-c/Transforms/Scalar.h +++ include/llvm-c/Transforms/Scalar.h @@ -117,6 +117,9 @@ /** See llvm::createEarlyCSEPass function */ void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM); +/** See llvm::createHeapToStackPass function */ +void LLVMAddHeapToStackPass(LLVMPassManagerRef PM); + /** See llvm::createLowerExpectIntrinsicPass function */ void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM); Index: include/llvm/IR/DataLayout.h =================================================================== --- include/llvm/IR/DataLayout.h +++ include/llvm/IR/DataLayout.h @@ -223,6 +223,11 @@ return (StackNaturalAlign != 0) && (Align > StackNaturalAlign); } + // Returns the natural stack alignment (or 0 if unspecified). + unsigned naturalStackAlignment() const { + return StackNaturalAlign; + } + /// fitsInLegalInteger - This function returns true if the specified type fits /// in a native integer type supported by the CPU. For example, if the CPU /// only supports i32 as a native integer type, then i27 fits in a legal Index: include/llvm/IR/IRBuilder.h =================================================================== --- include/llvm/IR/IRBuilder.h +++ include/llvm/IR/IRBuilder.h @@ -832,6 +832,13 @@ const Twine &Name = "") { return Insert(new AllocaInst(Ty, ArraySize), Name); } + AllocaInst *CreateAlignedAlloca(Type *Ty, unsigned Align, + Value *ArraySize = 0, + const Twine &Name = "") { + AllocaInst *AI = CreateAlloca(Ty, ArraySize, Name); + AI->setAlignment(Align); + return AI; + } // \brief Provided to resolve 'CreateLoad(Ptr, "...")' correctly, instead of // converting the string to 'bool' for the isVolatile parameter. LoadInst *CreateLoad(Value *Ptr, const char *Name) { Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -123,6 +123,7 @@ void initializeThreadSanitizerPass(PassRegistry&); void initializeDataFlowSanitizerPass(PassRegistry&); void initializeEarlyCSEPass(PassRegistry&); +void initializeHeapToStackPass(PassRegistry&); void initializeExpandISelPseudosPass(PassRegistry&); void initializeFindUsedTypesPass(PassRegistry&); void initializeFunctionAttrsPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -140,6 +140,7 @@ (void) llvm::createInstCountPass(); (void) llvm::createCodeGenPreparePass(); (void) llvm::createEarlyCSEPass(); + (void) llvm::createHeapToStackPass(); (void) llvm::createGVNPass(); (void) llvm::createMemCpyOptPass(); (void) llvm::createLoopDeletionPass(); Index: include/llvm/Transforms/IPO/PassManagerBuilder.h =================================================================== --- include/llvm/Transforms/IPO/PassManagerBuilder.h +++ include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -106,6 +106,7 @@ bool SLPVectorize; bool LoopVectorize; bool LateVectorize; + bool HeapToStackPromotion; private: /// ExtensionList - This is list of all of the extensions that are registered. Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -289,6 +289,12 @@ //===----------------------------------------------------------------------===// // +// HeapToStack - This pass performs heap-to-stack allocation promotion. +// +FunctionPass *createHeapToStackPass(); + +//===----------------------------------------------------------------------===// +// // MemCpyOpt - This pass performs optimizations related to eliminating memcpy // calls and/or combining multiple stores into memset's. // Index: include/llvm/Transforms/Utils/BasicBlockUtils.h =================================================================== --- include/llvm/Transforms/Utils/BasicBlockUtils.h +++ include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -205,6 +205,13 @@ TerminatorInst *SplitBlockAndInsertIfThen(Instruction *Cmp, bool Unreachable, MDNode *BranchWeights = 0); +/// Same as SplitBlockAndInsertIfThen, but the first parameter provides the +/// split point (the first instruction in the new tail block) while the second +/// provides the conditional. This is useful, / for example, if Cmp is a PHI +/// node. +TerminatorInst *SplitBlockAndInsertIfThenAt(Instruction *SplitBefore, + Value *Cmp, bool Unreachable, MDNode *BranchWeights = 0); + /// /// GetIfCondition - Check whether BB is the merge point of a if-region. /// If so, return the boolean condition that determines which entry into Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -54,6 +54,10 @@ cl::init(true), cl::Hidden, cl::desc("Enable the new, experimental SROA pass")); +static cl::opt RunHeapToStack("heap-to-stack-promotion", + cl::init(true), cl::Hidden, + cl::desc("Run the heap-to-stack promotion pass")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -65,6 +69,7 @@ SLPVectorize = RunSLPVectorization; LoopVectorize = RunLoopVectorization; LateVectorize = LateVectorization; + HeapToStackPromotion = RunHeapToStack; } PassManagerBuilder::~PassManagerBuilder() { @@ -116,6 +121,8 @@ addInitialAliasAnalysisPasses(FPM); FPM.add(createCFGSimplificationPass()); + if (HeapToStackPromotion) + FPM.add(createHeapToStackPass()); if (UseNewSROA) FPM.add(createSROAPass()); else @@ -173,6 +180,9 @@ MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args // Start of function pass. + // Replace heap with stack allocations. + if (HeapToStackPromotion) + MPM.add(createHeapToStackPass()); // Break up aggregate allocas, using SSAUpdater. if (UseNewSROA) MPM.add(createSROAPass(/*RequiresDomTree*/ false)); @@ -195,6 +205,10 @@ MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops + // Replace heap with stack allocations. + if (HeapToStackPromotion) + MPM.add(createHeapToStackPass()); + if (!LateVectorize && LoopVectorize) MPM.add(createLoopVectorizePass(DisableUnrollLoops)); @@ -319,6 +333,10 @@ PM.add(createInstructionCombiningPass()); PM.add(createJumpThreadingPass()); + // Replace heap with stack allocations. + if (HeapToStackPromotion) + PM.add(createHeapToStackPass()); + // Break up allocas if (UseNewSROA) PM.add(createSROAPass()); Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -8,6 +8,7 @@ EarlyCSE.cpp GlobalMerge.cpp GVN.cpp + HeapToStack.cpp IndVarSimplify.cpp JumpThreading.cpp LICM.cpp Index: lib/Transforms/Scalar/HeapToStack.cpp =================================================================== --- /dev/null +++ lib/Transforms/Scalar/HeapToStack.cpp @@ -0,0 +1,606 @@ +//===- HeapToStack.cpp - Code to perform heap-to-stack promotion ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements heap-to-stack allocation promotion. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "heaptostack" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +using namespace llvm; + +STATISTIC(NumMallocToAlloca, "Number of heap allocations removed"); +STATISTIC(NumFreeDeleted, "Number of deallocation calls removed"); + +static cl::opt +MaxConstSize("max-heap-to-stack-size", cl::init(1024), cl::Hidden, + cl::desc("The maximum allocation size for heap-to-stack promotion")); + +static cl::opt +NoDynamic("no-dynamic-heap-to-stack", cl::init(false), cl::Hidden, + cl::desc("Do not create dynamic stack allocations")); + +static cl::opt +NullFree("heap-to-stack-null-free", cl::init(false), cl::Hidden, + cl::desc("Pass NULL to free to conditionally elide")); + +namespace { + struct MallocCaptureTracker : public CaptureTracker { + MallocCaptureTracker(TargetLibraryInfo *T) : Captured(false), TLI(T) {} + + void tooManyUses() LLVM_OVERRIDE { Captured = true; } + + bool shouldExplore(Use *U) LLVM_OVERRIDE { + Value *V = U->getUser(); + if (isa(V) || isa(V)) + UsesMalloc.insert(V); + return true; + } + + bool captured(Use *U) LLVM_OVERRIDE { + if (isFreeCall(U->getUser(), TLI)) + return false; + Captured = true; + return true; + } + + bool Captured; + SmallPtrSet UsesMalloc; + TargetLibraryInfo *TLI; + }; + + struct HeapToStack : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + HeapToStack() : FunctionPass(ID) { + initializeHeapToStackPass(*PassRegistry::getPassRegistry()); + } + + AliasAnalysis *AA; + const DataLayout *DL; + DominatorTree *DT; + TargetLibraryInfo *TLI; + + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } + + private: + void collectPossibleMallocs(Function &F, + SmallSet &PossibleMallocs, + unsigned &FuncFreeCnt); + void verifyPossibleMallocs( + SmallSet &PossibleMallocs, + SmallSet &VerifiedMallocs, + DenseMap > &FreeMallocs, + SmallSet &RelevantBlocks); + void collectAliasingTailCalls( + SmallSet &RelevantBlocks, + SmallSet &VerifiedMallocs, + SmallSet &TailCalls); + void determineAmbiguousFrees( + SmallSet &VerifiedMallocs, + DenseMap > &FreeMallocs, + SmallSet &AmbiguousFrees); + }; +} + +char HeapToStack::ID = 0; +INITIALIZE_PASS_BEGIN(HeapToStack, "heaptostack", + "Heap-to-stack promotion", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(HeapToStack, "heaptostack", + "Heap-to-stack promotion", false, false) + +FunctionPass *llvm::createHeapToStackPass() { + return new HeapToStack(); +} + +// Find calls to malloc that have a known small size. +void HeapToStack::collectPossibleMallocs(Function &F, + SmallSet &PossibleMallocs, unsigned &FuncFreeCnt) { + + FuncFreeCnt = 0; + for (Function::iterator I = F.begin(), IE = F.end(); I != IE; ++I) { + if (!DT->isReachableFromEntry(I)) + continue; + + for (BasicBlock::iterator J = I->begin(), JE = I->end(); J != JE; ++J) + // FIXME: extractMallocCall is like isMallocLikeFn, but ignores + // InvokeInst, and we use extractMallocCall because getMallocArraySize + // and friends currently require CallInst. This should be enhanced. + if (CallInst *CI = extractMallocCall(J, TLI)) { + // Note: Not using getObjectSize here because it creates instructions. + Type *ATy = getMallocAllocatedType(CI, TLI); + Value *AS = getMallocArraySize(CI, DL, TLI, + /* LookThroughSExt */ true); + unsigned ATySize = ATy ? DL->getTypeAllocSize(ATy) : 0; + if (AS && ATySize) { + unsigned MaxAS = MaxConstSize/ATySize; + if (!isPowerOf2_32(MaxAS)) + MaxAS = (unsigned) NextPowerOf2(MaxAS) >> 1u; + unsigned MaxASMask = ~(MaxAS - 1u); + unsigned ASBits = AS->getType()->getPrimitiveSizeInBits(); + if (MaskedValueIsZero(AS, APInt(ASBits, MaxASMask), DL) && + (!NoDynamic || isa(AS))) { + DEBUG(dbgs() << "H2S: Potential malloc " << *CI << + " of array size " << *AS << " of " << *ATy << + " (size = " << ATySize << ")\n"); + PossibleMallocs.insert(CI); + } + } + } else if (isFreeCall(J, TLI)) + ++FuncFreeCnt; + } +} + +// For calls to malloc that might be promotable, find those that actually are +// promotable. To do this, verify that on all paths from the malloc to all exit +// blocks, the allocated memory is freed. +void HeapToStack::verifyPossibleMallocs( + SmallSet &PossibleMallocs, + SmallSet &VerifiedMallocs, + DenseMap > &FreeMallocs, + SmallSet &RelevantBlocks) { + for (SmallSet::iterator I = PossibleMallocs.begin(), + IE = PossibleMallocs.end(); I != IE; ++I) { + bool FoundFree = false; + for (BasicBlock::iterator J = llvm::next(BasicBlock::iterator(*I)), + JE = (*I)->getParent()->end(); J != JE; ++J) { + // If this is a free call of the given malloc, then this control-flow + // path allows stack promotion. + if (isFreeCall(J, TLI) && + *I == cast(J)->getArgOperand(0)->stripPointerCasts()) { + DEBUG(dbgs() << "H2S: Found free " << *J << " for malloc " << **I << + " in parent basic block\n"); + FoundFree = true; + FreeMallocs[J].insert(*I); + VerifiedMallocs.insert(*I); + RelevantBlocks.insert((*I)->getParent()); + break; + } + } + + if (FoundFree) + continue; + + typedef std::pair > BBAndValues; + SmallVector WorkQueue; + // Search all successors; within each successor include all PHI-translated values. + for (succ_iterator J = succ_begin((*I)->getParent()), + JE = succ_end((*I)->getParent()); J != JE; ++J) { + SmallSet V; + V.insert(*I); + + for (BasicBlock::iterator K = (*J)->begin(), + KE = BasicBlock::iterator((*J)->getFirstNonPHI()); K != KE; ++K) { + PHINode *P = cast(K); + unsigned Idx; + if ((Idx = P->getBasicBlockIndex((*I)->getParent())) == -1) + continue; + if (P->getIncomingValue(Idx)->stripPointerCasts() == *I) + V.insert(P); + } + + WorkQueue.push_back(BBAndValues(*J, V)); + } + + if (WorkQueue.empty()) + continue; + + // Note that this walk is incomplete because it will miss frees of PHIs + // where you'd need to walk back several loop iterations (a cyclic chain of + // many PHIs) to see that the PHI value really was the malloc. Such cases + // seem unlikely to be important in practice because it seems unlikely + // that, when such a case exists, no other control-flow path would exist on + // which the malloc was not freed. + bool NotFreed = false; + SmallSet PossibleFrees; + DenseSet Visited; + while (!WorkQueue.empty()) { + BBAndValues B = WorkQueue.pop_back_val(); + if (!Visited.insert(B.first).second) + continue; + + FoundFree = false; + for (BasicBlock::iterator J = B.first->begin(), JE = B.first->end(); + J != JE; ++J) { + if (isFreeCall(J, TLI) && B.second.count((Instruction *) + cast(J)->getArgOperand(0)->stripPointerCasts())) { + DEBUG(dbgs() << "H2S: Found free " << *J << " for malloc " << **I << + " in basic block " << B.first->getName() << "\n"); + FoundFree = true; + PossibleFrees.insert(J); + break; + } + } + + if (FoundFree) + continue; + + bool UnvisitedSucc = false; + for (succ_iterator J = succ_begin(B.first), JE = succ_end(B.first); + J != JE; ++J) { + if (Visited.count(*J)) + continue; + + SmallSet V(B.second); + for (BasicBlock::iterator K = (*J)->begin(), + KE = BasicBlock::iterator((*J)->getFirstNonPHI()); K != KE; ++K) { + PHINode *P = cast(K); + unsigned Idx; + if ((Idx = P->getBasicBlockIndex(B.first)) == -1) + continue; + if (B.second.count((Instruction *) + P->getIncomingValue(Idx)->stripPointerCasts())) + V.insert(P); + } + + WorkQueue.push_back(BBAndValues(*J, V)); + UnvisitedSucc = true; + } + + if (!UnvisitedSucc) { + DEBUG(dbgs() << "H2S: Did not find free for malloc " << **I << + " and have no unvisited successors of " << + B.first->getName() << "\n"); + NotFreed = true; + break; + } + } + + if (!NotFreed) { + DEBUG(dbgs() << "H2S: Found free on all paths (" << + PossibleFrees.size() << " frees in total) for malloc " << + **I << "\n"); + for (SmallSet::iterator J = PossibleFrees.begin(), + JE = PossibleFrees.end(); J != JE; ++J) + FreeMallocs[*J].insert(*I); + VerifiedMallocs.insert(*I); + RelevantBlocks.insert(Visited.begin(), Visited.end()); + } + } +} + +// Collect tail calls that alias with the chosen mallocs. We should try hard +// not to include those functions that clearly don't use uncaptured +// to-be-converted mallocs, because doing so will pessimize later aliasing +// analysis. +void HeapToStack::collectAliasingTailCalls( + SmallSet &RelevantBlocks, + SmallSet &VerifiedMallocs, + SmallSet &TailCalls) { + // First, figure out which malloc results might be captured, and for those + // that are not captured, record their users. + SmallSet CapturedMallocs; + SmallPtrSet UncapturedMallocUsers; + for (SmallSet::iterator I = VerifiedMallocs.begin(), + IE = VerifiedMallocs.end(); I != IE; ++I) { + MallocCaptureTracker MCT(TLI); + PointerMayBeCaptured(*I, &MCT); + + if (MCT.Captured) + CapturedMallocs.insert(*I); + else + UncapturedMallocUsers.insert(MCT.UsesMalloc.begin(), + MCT.UsesMalloc.end()); + } + + for (SmallSet::iterator I = RelevantBlocks.begin(), + IE = RelevantBlocks.end(); I != IE; ++I) + for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) + if (CallInst *CI = dyn_cast(J)) { + if (!CI->isTailCall()) + continue; + + for (SmallSet::iterator K = VerifiedMallocs.begin(), + KE = VerifiedMallocs.end(); K != KE; ++K) { + if (*K == CI || isFreeCall(CI, TLI)) + continue; + + // If this is not a captured malloc, and this call does not use, + // directly or indirectly, an uncaptured malloc, then it can keep its + // tail-call marking. + if (!CapturedMallocs.count(*K) && !UncapturedMallocUsers.count(CI)) + continue; + + if (AA->getModRefInfo(CI, AliasAnalysis::Location(*K)) != + AliasAnalysis::NoModRef) + TailCalls.insert(CI); + } + } +} + +// For all frees associated with verified mallocs, idenfity those frees that +// might also free other non-promotable mallocs. These frees cannot actually be +// removed, but instead need to be elided when they would otherwise free the +// promoted allocations. +void HeapToStack::determineAmbiguousFrees( + SmallSet &VerifiedMallocs, + DenseMap > &FreeMallocs, + SmallSet &AmbiguousFrees) { + for (DenseMap >::iterator I = + FreeMallocs.begin(), IE = FreeMallocs.end(); I != IE; ++I) { + Value *FAddr = cast(I->first)->getArgOperand(0); + assert(isa(FAddr) && + "Free address for promotable malloc not an instruction"); + FAddr = FAddr->stripPointerCasts(); + + if (VerifiedMallocs.count((Instruction *) FAddr)) { + DEBUG(dbgs() << "H2S: malloc " << *FAddr << + " directly paired with free " << *I->first << + " (skipping backward search)\n"); + continue; + } + + typedef std::pair BBAndAddr; + typedef std::pair BBAndValue; + SmallVector WorkQueue; + // We might visit the same basic block more than once with different + // phi-translated values. + DenseSet Visited; + + BasicBlock *FreeBB = I->first->getParent(); + PHITransAddr TFAddr(FAddr, DL); + if (!TFAddr.IsPotentiallyPHITranslatable()) { + DEBUG(dbgs() << "H2S: Terminating backward seach from free " << + *I->first << " untranslatable PHI " << + *TFAddr.getAddr() << " in basic block " << + FreeBB->getName() << "\n"); + AmbiguousFrees.insert(I->first); + continue; + } + + for (pred_iterator PI = pred_begin(FreeBB), PE = pred_end(FreeBB); + PI != PE; ++PI) { + PHITransAddr T = TFAddr; + if (T.PHITranslateValue(FreeBB, *PI, /* DT */ 0)) { + DEBUG(dbgs() << "H2S: Terminating backward seach from free " << + *I->first << " untranslatable PHI " << + *TFAddr.getAddr() << " in basic block " << + FreeBB->getName() << " to basic block " << + (*PI)->getName() << "\n"); + AmbiguousFrees.insert(I->first); + WorkQueue.clear(); + break; + } + + WorkQueue.push_back(BBAndAddr(*PI, T)); + } + + while (!WorkQueue.empty()) { + BBAndAddr A = WorkQueue.pop_back_val(); + if (!Visited.insert(BBAndValue(A.first, A.second.getAddr())).second) + continue; + + if (!isa(A.second.getAddr())) { + DEBUG(dbgs() << "H2S: Terminating backward seach from free " << + *I->first << " translated PHI " << + *TFAddr.getAddr() << " in basic block " << + A.first->getName() << " to non-instruction\n"); + AmbiguousFrees.insert(I->first); + break; + } + + if (VerifiedMallocs.count((Instruction *) + A.second.getAddr()->stripPointerCasts())) { + DEBUG(dbgs() << "H2S: Found malloc " << *A.second.getAddr() << + " paired with free " << *I->first << + " in backward search\n"); + continue; + } + + if (!A.second.IsPotentiallyPHITranslatable()) { + DEBUG(dbgs() << "H2S: Terminating backward seach from free " << + *I->first << " untranslatable PHI " << + *TFAddr.getAddr() << " in basic block " << + FreeBB->getName() << "\n"); + AmbiguousFrees.insert(I->first); + break; + } + + pred_iterator PI = pred_begin(A.first), PE = pred_end(A.first); + if (PI == PE) + AmbiguousFrees.insert(I->first); + for (; PI != PE; ++PI) { + PHITransAddr T = A.second; + if (T.PHITranslateValue(A.first, *PI, /* DT */ 0)) { + DEBUG(dbgs() << "H2S: Terminating backward seach from free " << + *I->first << " untranslatable PHI " << + *T.getAddr() << " in basic block " << + A.first->getName() << " to basic block " << + (*PI)->getName() << "\n"); + AmbiguousFrees.insert(I->first); + WorkQueue.clear(); + break; + } + + WorkQueue.push_back(BBAndAddr(*PI, T)); + } + } + } +} + +bool HeapToStack::runOnFunction(Function &F) { + AA = &getAnalysis(); + DL = getAnalysisIfAvailable(); + DT = &getAnalysis(); + TLI = &getAnalysis(); + + bool Changed = false; + + if (!DL) { + DEBUG(dbgs() << "H2S: data layout not available; aborting!\n"); + return Changed; + } + + // Find all malloc allocations that are freed on all paths out of the function. + + // Collect all possibly-relevant malloc/free calls. + SmallSet PossibleMallocs; + unsigned FuncFreeCnt = 0; + collectPossibleMallocs(F, PossibleMallocs, FuncFreeCnt); + + if (PossibleMallocs.empty() || FuncFreeCnt == 0) + return Changed; + + SmallSet VerifiedMallocs; + DenseMap > FreeMallocs; + SmallSet RelevantBlocks; + verifyPossibleMallocs(PossibleMallocs, VerifiedMallocs, FreeMallocs, + RelevantBlocks); + + if (VerifiedMallocs.empty()) + return Changed; + + // Construct a set of frees used by the promotable mallocs found above that + // might also free other allocations. + SmallSet AmbiguousFrees; + determineAmbiguousFrees(VerifiedMallocs, FreeMallocs, AmbiguousFrees); + + SmallSet TailCalls; + collectAliasingTailCalls(RelevantBlocks, VerifiedMallocs, TailCalls); + + Changed = true; + + // Calls marked for tail-call optimization that alias with the malloc return + // value cannot be marked as tail calls after the modification (because of an + // assumption in the aliasing analysis that tail calls don't modify local + // stack allocations). This should not affect any calls actually in tail-call + // position (because there must be a free after the call). + for (SmallSet::iterator I = TailCalls.begin(), + IE = TailCalls.end(); I != IE; ++I) { + (*I)->setTailCall(false); + + DEBUG(dbgs() << "H2S: Removing tail-call marking from " << **I << "\n"); + } + + LLVMContext &Context = F.getContext(); + IRBuilder<> Builder(Context); + + // Each ambigious free needs a boolean flag to indicate whether or not it + // should actually free its memory. + for (SmallSet::iterator I = AmbiguousFrees.begin(), + IE = AmbiguousFrees.end(); I != IE; ++I) { + SSAUpdater PHIInserter; + PHIInserter.Initialize(Builder.getInt1Ty(), "free"); + + PHIInserter.AddAvailableValue(&F.getEntryBlock(), Builder.getTrue()); + for (SmallSet::iterator J = FreeMallocs[*I].begin(), + JE = FreeMallocs[*I].end(); J != JE; ++J) + PHIInserter.AddAvailableValue((*J)->getParent(), Builder.getFalse()); + + CallInst *CI = cast(*I); + Value *Addr = CI->getArgOperand(0); + Value *Cmp = PHIInserter.GetValueInMiddleOfBlock(CI->getParent()); + + // FIXME: Some of the code below assumes that CI is not the last + // instruction in the block. When we handle invokes, this will need + // enhancement. + if (NullFree) { + // Passing a NULL pointer to free is legal (and ignored), so the most + // efficient thing to do is just to NULL the pointer if we should not free + // it. + Builder.SetInsertPoint(CI); + Value *S = + Builder.CreateSelect(Cmp, + Addr, Constant::getNullValue(Addr->getType())); + CI->setArgOperand(0, S); + Builder.SetInsertPoint(CI->getNextNode()); + Builder.CreateLifetimeEnd(Addr); + } else { + Instruction *T = SplitBlockAndInsertIfThenAt(CI, Cmp, false); + Builder.SetInsertPoint(CI); + Builder.CreateLifetimeEnd(Addr); + CI->moveBefore(T); + } + } + + // Each non-ambigious free should be removed. + for (DenseMap >::iterator I = + FreeMallocs.begin(), IE = FreeMallocs.end(); I != IE; ++I) { + if (AmbiguousFrees.count(I->first)) + continue; + + DEBUG(dbgs() << "H2S: Removing free " << *I->first << "\n"); + + Builder.SetInsertPoint(I->first); + Builder.CreateLifetimeEnd(cast(I->first)->getArgOperand(0)); + + I->first->eraseFromParent(); + ++NumFreeDeleted; + } + + // Now replace all choosen mallocs with allocas. + for (SmallSet::iterator I = VerifiedMallocs.begin(), + IE = VerifiedMallocs.end(); I != IE; ++I) { + CallInst *CI = cast(*I); + + Type *ATy = getMallocAllocatedType(cast(*I), TLI); + Value *AS = getMallocArraySize(cast(*I), DL, TLI, + /* LookThroughSExt */ true); + bool IsConst = isa(AS); + + DEBUG(dbgs() << "H2S: Replacing malloc " << **I << "\n"); + + // If this is a constant-sized allocation, then we can insert the + // allocation in the entry block. + if (IsConst) + Builder.SetInsertPoint(F.getEntryBlock().getFirstInsertionPt()); + else + Builder.SetInsertPoint(CI); + Value *NewAlloca = Builder.CreateBitCast(Builder.CreateAlignedAlloca(ATy, + DL->naturalStackAlignment(), AS), CI->getType()); + if (IsConst) + Builder.SetInsertPoint(CI); + Builder.CreateLifetimeStart(NewAlloca); + + NewAlloca->takeName(CI); + CI->replaceAllUsesWith(NewAlloca); + CI->eraseFromParent(); + + DEBUG(dbgs() << "H2S: Replaced malloc with " << + *NewAlloca->stripPointerCasts() << "\n"); + + ++NumMallocToAlloca; + } + + return Changed; +} + Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -36,6 +36,7 @@ initializeDSEPass(Registry); initializeGVNPass(Registry); initializeEarlyCSEPass(Registry); + initializeHeapToStackPass(Registry); initializeIndVarSimplifyPass(Registry); initializeJumpThreadingPass(Registry); initializeLICMPass(Registry); @@ -180,6 +181,10 @@ unwrap(PM)->add(createEarlyCSEPass()); } +void LLVMAddHeapToStackPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createHeapToStackPass()); +} + void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createTypeBasedAliasAnalysisPass()); } Index: lib/Transforms/Utils/BasicBlockUtils.cpp =================================================================== --- lib/Transforms/Utils/BasicBlockUtils.cpp +++ lib/Transforms/Utils/BasicBlockUtils.cpp @@ -649,6 +649,12 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp, bool Unreachable, MDNode *BranchWeights) { Instruction *SplitBefore = Cmp->getNextNode(); + return SplitBlockAndInsertIfThenAt(SplitBefore, Cmp, Unreachable, + BranchWeights); +} + +TerminatorInst *llvm::SplitBlockAndInsertIfThenAt(Instruction *SplitBefore, + Value *Cmp, bool Unreachable, MDNode *BranchWeights) { BasicBlock *Head = SplitBefore->getParent(); BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); TerminatorInst *HeadOldTerm = Head->getTerminator(); Index: test/Transforms/HeapToStack/basic.ll =================================================================== --- /dev/null +++ test/Transforms/HeapToStack/basic.ll @@ -0,0 +1,147 @@ +; RUN: opt < %s -heaptostack -S | FileCheck %s +; RUN: opt < %s -heaptostack -heap-to-stack-null-free -S | FileCheck -check-prefix=CHECK-NF %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; void bar(int *x); +; void foo1() { +; int *x = malloc(16); +; bar(x); +; free(x); +; } + +define void @foo1() #0 { +entry: + %call = tail call noalias i8* @malloc(i64 16) #1 + %0 = bitcast i8* %call to i32* + tail call void @bar(i32* %0) #1 + tail call void @free(i8* %call) #1 + ret void + +; CHECK-LABEL: @foo1 +; CHECK: alloca i32, i64 4, align 16 +; CHECK-NOT: @malloc +; CHECK-NOT: tail call +; CHECK: call void @bar +; CHECK-NOT: @free +; CHECK: ret +} + +declare noalias i8* @malloc(i64) #1 + +declare void @bar(i32*) + +declare void @free(i8* nocapture) #1 + +; void foo2(int a) { +; int *x; +; +; if (a == 0) +; x = malloc(16); +; else +; x = malloc(32); +; +; bar(x); +; free(x); +; } + +define void @foo2(i32 %a) #0 { +entry: + %cmp = icmp eq i32 %a, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = tail call noalias i8* @malloc(i64 16) #1 + br label %if.end + +if.else: ; preds = %entry + %call1 = tail call noalias i8* @malloc(i64 32) #1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %x.0.in = phi i8* [ %call, %if.then ], [ %call1, %if.else ] + %x.0 = bitcast i8* %x.0.in to i32* + tail call void @bar(i32* %x.0) #1 + tail call void @free(i8* %x.0.in) #1 + ret void + +; CHECK-LABEL: @foo2 +; CHECK-NOT: @malloc +; CHECK-DAG: alloca i8, i64 16, align 16 +; CHECK-DAG: alloca i8, i64 32, align 16 +; CHECK-NOT: @free +; CHECK: ret +} + +; void foo3(int *y) { +; if (y == 0) +; y = malloc(48); +; +; bar(y); +; free(y); +; } + +define void @foo3(i32* %y) #0 { +entry: + %cmp = icmp eq i32* %y, null + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %call = tail call noalias i8* @malloc(i64 48) #1 + %0 = bitcast i8* %call to i32* + br label %if.end + +if.end: ; preds = %if.then, %entry + %y.addr.0 = phi i32* [ %0, %if.then ], [ %y, %entry ] + tail call void @bar(i32* %y.addr.0) #1 + %1 = bitcast i32* %y.addr.0 to i8* + tail call void @free(i8* %1) #1 + ret void + +; CHECK-NF-LABEL: @foo3 +; CHECK-NF-NOT: @malloc + +; CHECK-NF: %free = phi i1 [ false, %if.then ], [ true, %entry ] +; CHECK-NF: %[[PTR:[0-9]+]] = select i1 %free, i8* %{{[0-9]+}}, i8* null +; CHECK-NF: tail call void @free(i8* %[[PTR]]) + +; CHECK-NF: ret + +; CHECK-LABEL: @foo3 +; CHECK-NOT: @malloc + +; CHECK: %free = phi i1 [ false, %if.then ], [ true, %entry ] +; CHECK: br i1 %free, label +; CHECK: tail call void @free(i8* %{{[0-9]+}}) +; CHECK-NEXT: br + +; CHECK: ret +} + +; void foo4(unsigned char n) { +; int *x = malloc(n*sizeof(int)); +; bar(x); +; free(x); +; } + +define void @foo4(i8 zeroext %n) #0 { +entry: + %conv = zext i8 %n to i64 + %mul = shl nuw nsw i64 %conv, 2 + %call = call noalias i8* @malloc(i64 %mul) #1 + %0 = bitcast i8* %call to i32* + call void @bar(i32* %0) #1 + call void @free(i8* %call) #1 + ret void + +; CHECK-LABEL: @foo4 +; CHECK: alloca i32, i64 %conv, align 16 +; CHECK-NOT: @malloc +; CHECK-NOT: tail call +; CHECK: call void @bar +; CHECK-NOT: @free +; CHECK: ret +} + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind }