Index: llvm/include/llvm/Analysis/LoopAllocationInfo.h =================================================================== --- /dev/null +++ llvm/include/llvm/Analysis/LoopAllocationInfo.h @@ -0,0 +1,154 @@ +//===- LoopAllocationInfo.h - memory allocations inside loops ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOOPALLOCATIONINFO_H +#define LLVM_ANALYSIS_LOOPALLOCATIONINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + +class BasicBlock; +class CallInst; +class DominatorTree; +class Loop; +class LoopInfo; +class TargetLibraryInfo; + +/// Track %ptr = allocfn to one or more freefn(%ptr) relationships within the +/// loop. +/// +/// This analysis stores information about allocation and deallocation calls +/// where the pointer is only live within the loop. Its design is tightly +/// coupled to the internal implementation of LICM, but is exposed in a public +/// API because the llvm::hoistRegion and llvm::sinkRegion functions in LICM are +/// also exposed. +/// +/// An allocation and its corresponding deallocations must be both hoisted and +/// sunk, or neither. LICM performs sinking first, then hoisting. As sinking and +/// hoisting is performed, more instructions will appear to be loop invariant, +/// creating more hoisting opportunities. For this reason we split the analysis +/// into two pieces: +/// 1. for a given pointer that is only live in the loop, retain a list its +/// matching deallocating instructions. +/// 2. during sinking, if we would sink the deallocation, don't, but inform +/// the LoopAllocationInfo. If all deallocations would have been sunk, and +/// hoistSink would hoist the allocation, perform both the hoist and sink +/// the various deallocation instructions. +/// +/// There's two subtleties here. First, we don't need guaranteed execution of +/// any particular deallocation function during unwinding. Before this +/// transformation, an unwind mid-loop would leak one pointer that was allocated +/// at the start of the loop. After this transformation, it would leak the one +/// pointer allocated before the loop began. There is no difference visible to +/// the caller. +/// +/// Second, we don't need to identify every deallocation in order for the +/// transformation to be safe. We need to know that all backedges (technically, +/// all edges back to the header block, which may be a superset of backedges in +/// the event of unnatural loop cycles) are only reached by passing through some +/// deallocation. Those deallocations are guaranteed to be sunk if/when we hoist +/// the allocation. Other functions may also happen to free the pointer, but we +/// would have no way of knowing that. If the function frees and we go back to +/// the header block, the code has undefined behaviour due to a double-free. For +/// it to have well-defined behaviour after freeing, control must pass to a loop +/// exit block. +/// +/// Every exit block is categorized into blocks where the pointer is certainly +/// freed by one of the deallocations we've already identified, or is certainly +/// not freed by any of the deallocations we've identified. In the former case +/// we will sink a free into that exit block. An opaque function that +/// deallocates fits into the latter category, and we won't insert a free call +/// in the block. In the remaining case where there are multiple paths to an +/// exit block some of which free the pointer and others that don't pass through +/// an identified free, we abort the transform. +/// +/// We also have to ensure that the maximum amount of allocated memory is not +/// raised by this transformation. To ensure this, we stop looking for +/// allocations after seeing an opaque function call. For example: +/// loop_top: +/// %ptr1 = call i8* @malloc(i32 %loop_invariant1) ;; candidate +/// %ptr2 = call i8* @malloc(i32 %loop_variant2) ;; not candidate +/// %ptr3 = call i8* @malloc(i32 %loop_invariant3) ;; candidate +/// call void @opaque() ;; stop scanning +/// %ptr5 = call i8* @malloc(i32 %loop_invariant5) ;; not candidate +/// +/// The same idea applies in reverse order to @free calls at the loop exits: +/// loop_exiting: +/// call void @free(%ptr1) ;; not candidate +/// call void @opaque() ;; stop scanning +/// call void @free(%ptr2) ;; %ptr2 is candidate +/// ;; start here and scan upwards +class LoopAllocationInfo { +public: + void analyzeLoop(LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, + Loop *CurLoop); + + // LICM has determined that this deallocation call may be sunk. + void addSafeToSink(const CallInst *CI) { + if (DeallocationToAllocation.count(CI)) + SafeToSink.insert(CI); + } + + // Given an allocation call, return true iff all the safety conditions met to + // hoist this allocation and sink its deallocations. + bool mayHoist(const CallInst *CI) const; + + // Retrieve the list of free calls in the loop matching a given allocation. + const SmallVector &FreesForMalloc(CallInst *CI) const { + assert(Entries.count(CI)); + return Entries.find(CI)->second.Deallocations; + } + + // Retrieve the list of loop exit blocks that are only entered after the + // allocation has been deallocated. All other loop exit blocks are reached + // with the allocation still allocated. + const SmallVector & + ExitBlocksWithPointerFreed(CallInst *CI) const { + assert(Entries.count(CI)); + return Entries.find(CI)->second.ExitBlocksToAddDeallocationsTo; + } + + // Returns whether the loop has any potentially-hoistable allocations. + bool empty() const { return Entries.empty(); } + +private: + struct AllocationInfoEntry { + AllocationInfoEntry( + SmallVector &&Deallocations, + SmallVector &&ExitBlocksToAddDeallocationsTo) + : Deallocations(Deallocations), + ExitBlocksToAddDeallocationsTo(ExitBlocksToAddDeallocationsTo) {} + + // Instructions that deallocate this pointer, but may or may not be + // executed. + SmallVector Deallocations; + + // Loop exit blocks which can only be entered with the pointer already + // freed. + SmallVector ExitBlocksToAddDeallocationsTo; + }; + + // Store deallocation info for a given allocation. Keyed on the allocation + // call. + DenseMap Entries; + + // Look up allocation matching a given deallocation. + DenseMap DeallocationToAllocation; + + // Deallocations that LICM has determined are safe to sink. They can be sunk + // only if the whole collection of Deallocations in an entry are sunk and its + // allocation are matching is hoisted. + DenseSet SafeToSink; +}; + +} // namespace llvm + +#endif Index: llvm/include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -39,6 +39,7 @@ class BasicBlock; class DataLayout; class Loop; +class LoopAllocationInfo; class LoopInfo; class MemoryAccess; class MemorySSAUpdater; @@ -119,7 +120,8 @@ bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, - SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *); + LoopAllocationInfo *, SinkAndHoistLICMFlags &, + OptimizationRemarkEmitter *); /// Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth @@ -132,7 +134,8 @@ bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, - SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *); + LoopAllocationInfo *, SinkAndHoistLICMFlags &, + OptimizationRemarkEmitter *); /// This function deletes dead loops. The caller of this function needs to /// guarantee that the loop is infact dead. Index: llvm/lib/Analysis/CMakeLists.txt =================================================================== --- llvm/lib/Analysis/CMakeLists.txt +++ llvm/lib/Analysis/CMakeLists.txt @@ -50,6 +50,7 @@ Lint.cpp Loads.cpp LoopAccessAnalysis.cpp + LoopAllocationInfo.cpp LoopAnalysisManager.cpp LoopUnrollAnalyzer.cpp LoopInfo.cpp Index: llvm/lib/Analysis/LoopAllocationInfo.cpp =================================================================== --- /dev/null +++ llvm/lib/Analysis/LoopAllocationInfo.cpp @@ -0,0 +1,247 @@ +//===- LoopAllocationInfo.cpp - memory allocations inside loops -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopAllocationInfo.h" + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Casting.h" + +namespace llvm { + +/// Track ptr = allocfn to one or more freefn(ptr) relationships within the +/// loop. +/// +/// This analysis stores information about allocation and deallocation calls +/// where the pointer is only live within the loop. +/// +/// An allocation must be both hoisted and sunk, or neither. LICM performs +/// sinking first, then hoisting. As sinking and hoisting is performed, more +/// instructions will appear to be loop invariant, making more opportunities. +/// For this reason we split the analysis into two pieces: +/// 1. for a given pointer that is only live in the loop, retain a list its +/// matching deallocating instructions. +/// 2. during sinking, if we would sink the deallocation, don't, but inform +/// the LoopAllocationInfo. If all deallocations would have been sunk, and +/// hoistSink would hoist the allocation, perform both the hoist and sink +/// the various deallocation instructions. +/// +/// We don't need guaranteed execution of any deallocation function during +/// unwinding. Before this transformation, an unwind mid-loop would leak one +/// pointer that was allocated at the start of the loop. After this +/// transformation, it would leak the one pointer allocated before the loop +/// began. There is no difference visible to the caller. +/// +/// We do have to ensure that the maximum amount of allocated memory is not +/// raised by this transformation. To ensure this, we stop looking for +/// allocations after seeing an opaque function call. For example: +/// loop_top: +/// %ptr1 = call i8* @malloc(i32 %loop_invariant1) ;; candidate +/// %ptr2 = call i8* @malloc(i32 %loop_variant2) ;; not candidate +/// %ptr3 = call i8* @malloc(i32 %loop_invariant3) ;; candidate +/// call void @opaque() ;; stop scanning +/// %ptr5 = call i8* @malloc(i32 %loop_invariant5) ;; not candidate +/// +/// The same idea applies in reverse order to @free calls at the loop exits: +/// loop_exiting: +/// call void @free(%ptr1) ;; not candidate +/// call void @opaque() ;; stop scanning +/// call void @free(%ptr2) ;; %ptr2 is candidate +/// ;; start here and scan upwards +void LoopAllocationInfo::analyzeLoop(LoopInfo *LI, DominatorTree *DT, + TargetLibraryInfo *TLI, Loop *CurLoop) { + assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && + "Unexpected input to LoopAllocationInfo::analyzeLoop."); + if (!TLI) + return; + + SmallVector LatchBlocks; + CurLoop->getLoopLatches(LatchBlocks); + if (LatchBlocks.empty()) + return; + + SmallVector ExitBlocks; + CurLoop->getUniqueExitBlocks(ExitBlocks); + if (ExitBlocks.empty()) + return; + + // Unlike hoistRegion, we only scan the header block. If the allocation is in + // a later block, it must be behind some sort of branch which implies that + // it's conditional on something or else another optimization would've removed + // the unconditional branch earlier). + BasicBlock *HeaderBB = CurLoop->getHeader(); + SmallPtrSet HeaderBlock; + HeaderBlock.insert(HeaderBB); + for (BasicBlock::iterator I = HeaderBB->begin(), E = HeaderBB->end(); I != E; + ++I) { + // Find a block of allocations that are guaranteed to execute, ignoring the + // fact that allocation functions are themselves able to exit the program or + // unwind. + + // There are no allocation functions which are guaranteed to continue to + // execute the next instruction. + if (isGuaranteedToTransferExecutionToSuccessor(&*I)) { + // These two intrinsics and increase and decrease heap size respectively. + // Don't hoist over an allocating intrinsic in order to ensure that any + // out-of-memory conditions will occur on the same instruction, in case + // there are differences in how the instructions handle OOM. Don't hoist + // over a deallocating intrinsic to avoid increasing the maximum heap + // allocated. + if (match(&*I, PatternMatch::m_Intrinsic< + Intrinsic::objc_autoreleasePoolPush>()) || + match( + &*I, + PatternMatch::m_Intrinsic())) + break; + + continue; + } + + if (!llvm::isAllocationFn(&*I, TLI)) + break; + auto CI = cast(I); + + // Find places in the loop where the pointer is certainly freed. The places + // we identify must free this allocation, but there may be any number of + // other places that could free our pointer which we miss. + // + // We use this to ensure that the allocation is freed before we go around + // any loop latch. + SmallVector FreeCalls; + constexpr size_t UseVisitThreshold = 20; + SmallVector Worklist; + SmallPtrSet Visited; + auto Enqueue = [&](const Instruction *I) { + for (const Use &U : I->uses()) { + Instruction *User = cast(U.getUser()); + if (!CurLoop->contains(User)) + continue; + if (!Visited.insert(User).second) + continue; + if (Visited.size() == UseVisitThreshold) + return; + Worklist.push_back(User); + } + }; + Enqueue(CI); + do { + Instruction *I = Worklist.pop_back_val(); + if (isa(I)) { + Enqueue(I); + } else if (auto *GEP = dyn_cast(I)) { + if (GEP->hasAllZeroIndices()) + Enqueue(GEP); + } else if (auto *PN = dyn_cast(I)) { + if (PN->hasConstantOrUndefValue()) + Enqueue(I); + } else if (llvm::isFreeCall(I, TLI)) { + FreeCalls.push_back(cast(I)); + } + } while (!Worklist.empty() && Visited.size() < UseVisitThreshold); + + if (FreeCalls.empty()) + continue; + + SmallPtrSet FreeCallBBs; + for (auto FreeCall : FreeCalls) + FreeCallBBs.insert(FreeCall->getParent()); + + bool DoNotTransform = false; + + // Check that the frees we found cover all the latches. + for (BasicBlock *LatchBB : LatchBlocks) { + if (FreeCallBBs.count(LatchBB)) + continue; + SmallVector HeaderWorklist; + HeaderWorklist.push_back(HeaderBB); + if (llvm::isPotentiallyReachableFromMany(HeaderWorklist, LatchBB, + &FreeCallBBs, DT, LI)) { + DoNotTransform = true; + break; + } + } + if (DoNotTransform) + continue; + + // Determine whether each exit block is reached with the pointer freed or + // not. If it can be reached with the pointer conditionally freed, we abort + // the transform. + SmallVector InsertDeallocation; + for (auto ExitBB : ExitBlocks) { + SmallVector HeaderWorklist, FreeCallWorklist; + HeaderWorklist.push_back(HeaderBB); + FreeCallWorklist.insert(FreeCallWorklist.end(), FreeCallBBs.begin(), + FreeCallBBs.end()); + // Starting at the header, is there a path to ExitBB without passing + // through any frees? + bool ReachExitBBWithoutFree = llvm::isPotentiallyReachableFromMany( + HeaderWorklist, ExitBB, &FreeCallBBs, DT, LI); + // Starting from each of the frees, is there a path to ExitBB without + // passing back through the header, or is this a single-block loop where + // the malloc and free are in both in the header? + bool ReachExitBBWithFree = + FreeCallBBs.count(HeaderBB) || + llvm::isPotentiallyReachableFromMany(FreeCallWorklist, ExitBB, + &HeaderBlock, DT, LI); + assert((ReachExitBBWithoutFree || ReachExitBBWithFree) && + "loop exit blocks not reachable from loop"); + if (ReachExitBBWithoutFree && ReachExitBBWithFree) { + // These exist paths to this exit BB where the allocation is freed and + // not freed. + DoNotTransform = true; + break; + } + if (ReachExitBBWithFree) { + // Some blocks, such as those with a catchswitch instruction, can't + // have a free call inserted in them. Give up transforming the + // allocation in that case. + if (ExitBB->getFirstInsertionPt() == ExitBB->end()) { + DoNotTransform = true; + break; + } + // We'll need to insert a free-call into this exit BB if we hoist the + // allocation out of the loop. + InsertDeallocation.push_back(ExitBB); + } + } + if (DoNotTransform) + continue; + + for (const CallInst *FreeCall : FreeCalls) + DeallocationToAllocation[FreeCall] = CI; + Entries.try_emplace(CI, std::move(FreeCalls), + std::move(InsertDeallocation)); + } +} + +// Given an allocation call, are all the safety conditions met to hoist this +// allocation and sink its deallocations. +bool LoopAllocationInfo::mayHoist(const CallInst *CI) const { + auto it = Entries.find(CI); + if (it == Entries.end()) + return false; + for (const CallInst *FreeCall : it->second.Deallocations) { + if (!SafeToSink.count(FreeCall)) + return false; + } + return true; +} + +} // namespace llvm Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -40,6 +40,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/LoopAllocationInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" @@ -57,6 +58,7 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" @@ -356,6 +358,11 @@ ICFLoopSafetyInfo SafetyInfo(DT); SafetyInfo.computeLoopSafetyInfo(L); + // Scan for matching malloc/free pairs. + LoopAllocationInfo LAI; + if (L->hasDedicatedExits() && Preheader) + LAI.analyzeLoop(LI, DT, TLI, L); + // We want to visit all of the instructions in this loop... that are not parts // of our subloops (they have already had their invariants hoisted out of // their loop, into this loop, so there is no need to process the BODIES of @@ -369,10 +376,12 @@ LicmMssaOptCap, LicmMssaNoAccForPromotionCap}; if (L->hasDedicatedExits()) Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, - CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE); + CurAST.get(), MSSAU.get(), &SafetyInfo, &LAI, Flags, + ORE); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, - CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE); + CurAST.get(), MSSAU.get(), &SafetyInfo, &LAI, Flags, + ORE); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. @@ -476,13 +485,13 @@ DominatorTree *DT, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, - ICFLoopSafetyInfo *SafetyInfo, + ICFLoopSafetyInfo *SafetyInfo, LoopAllocationInfo *LAI, SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && - CurLoop != nullptr && SafetyInfo != nullptr && + CurLoop != nullptr && SafetyInfo != nullptr && LAI != nullptr && "Unexpected input to sinkRegion."); assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) && "Either AliasSetTracker or MemorySSA should be initialized."); @@ -500,6 +509,8 @@ if (inSubLoop(BB, CurLoop, LI)) continue; + bool ScanForFrees = !LAI->empty() && !isa(BB->getTerminator()); + for (BasicBlock::iterator II = BB->end(); II != BB->begin();) { Instruction &I = *--II; @@ -532,6 +543,19 @@ Changed = true; } } + + if (ScanForFrees) { + if (llvm::isFreeCall(&I, TLI)) { + LAI->addSafeToSink(cast(&I)); + } else if (isa(I) && + (!isa(I) || + match(&I, PatternMatch::m_Intrinsic< + Intrinsic::objc_autoreleasePoolPush>()))) { + // This call might allocate memory. Don't move frees which are before + // it to after it. + ScanForFrees = false; + } + } } } if (MSSAU && VerifyMemorySSA) @@ -776,12 +800,12 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, - ICFLoopSafetyInfo *SafetyInfo, + ICFLoopSafetyInfo *SafetyInfo, LoopAllocationInfo *LAI, SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && - CurLoop != nullptr && SafetyInfo != nullptr && + CurLoop != nullptr && SafetyInfo != nullptr && LAI != nullptr && "Unexpected input to hoistRegion."); assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) && "Either AliasSetTracker or MemorySSA should be initialized."); @@ -842,6 +866,50 @@ continue; } + // If it's a malloc/free pair, try hoisting it out to the preheader. We + // don't need to worry about intervening unwinding function calls in this + // case, so we can be more aggressive than for generic instructions. + if (CurLoop->hasLoopInvariantOperands(&I) && + llvm::isAllocationFn(&I, TLI) && LAI->mayHoist(cast(&I))) { + IRBuilder<>(&I).CreateLifetimeStart(&I); + hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, + MSSAU, ORE); + HoistedInstructions.push_back(&I); + auto CI = cast(&I); + + // One deallocation will be cloned into all the necessary exit blocks + // then the deallocations will be deleted. + CallInst *Pattern = LAI->FreesForMalloc(CI)[0]; + Pattern->dropUnknownNonDebugMetadata(); + + // In case it is freeing a copy of the pointer not available in all + // exits of the loop. + Pattern->setArgOperand(0, CI); + + // Merge debug locations. We don't track which frees have paths to which + // exit blocks, so we merge all the debug info to put on all the frees + // being placed in the exit blocks. + for (auto FreeCall : LAI->FreesForMalloc(CI)) { + Pattern->applyMergedLocation(Pattern->getDebugLoc(), + FreeCall->getDebugLoc()); + } + + // Insert frees into the loop exit blocks where we know the pointer + // would have been freed. + for (auto ExitBlock : LAI->ExitBlocksWithPointerFreed(CI)) { + Pattern->clone()->insertBefore(&*ExitBlock->getFirstInsertionPt()); + } + + // Replace the original frees with a @lifetime.end marker. + for (auto FreeCall : LAI->FreesForMalloc(CI)) { + IRBuilder<>(FreeCall).CreateLifetimeEnd(&I); + eraseInstruction(*FreeCall, *SafetyInfo, CurAST, MSSAU); + } + + Changed = true; + continue; + } + // Attempt to remove floating point division out of the loop by // converting it to a reciprocal multiplication. if (I.getOpcode() == Instruction::FDiv && @@ -2228,7 +2296,7 @@ for (BasicBlock *BB : CurLoop->getBlocks()) for (Instruction &I : *BB) { if (N >= LICMN2Theshold) { - LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for " + LLVM_DEBUG(dbgs() << "Aliasing N2 threshold exhausted for " << *(MemLoc.Ptr) << "\n"); return true; } Index: llvm/test/Transforms/LICM/allocs.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/allocs.ll @@ -0,0 +1,750 @@ +; RUN: opt -S -licm < %s | FileCheck %s +; RUN: opt -S -enable-mssa-loop-dependency -licm < %s | FileCheck %s + +declare i8* @malloc(i32) nounwind +declare void @free(i8* nocapture) nounwind + +declare void @use(i8*) +declare i1 @expr() + +define void @test1(i32 %loop_invariant) { +; CHECK-LABEL: @test1 +header: +; CHECK-LABEL: header: +; CHECK: @malloc +; CHECK: br label %loop + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-NOT: @malloc +; CHECK: @llvm.lifetime.start +; CHECK-NOT: @malloc +; CHECK: @use +; CHECK-NOT: @free +; CHECK: @llvm.lifetime.end +; CHECK-NOT: @free + %ptr = call i8* @malloc(i32 %loop_invariant) + call void @use(i8* %ptr) + call void @free(i8* %ptr) + br i1 undef, label %loop, label %loopexit + +loopexit: +; CHECK-LABEL: loopexit: +; CHECK: @free + ret void +} + +declare void @mightmalloc() + +define void @test2(i32 %loop_invariant) { +; CHECK-LABEL: @test2 +header: +; CHECK-LABEL: header: +; CHECK-NOT: @malloc +; CHECK: br label %loop + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: @malloc +; CHECK: @use +; CHECK: @free +; CHECK: @mightmalloc + %ptr = call i8* @malloc(i32 %loop_invariant) + call void @use(i8* %ptr) + call void @free(i8* %ptr) + call void @mightmalloc() + br i1 undef, label %loop, label %loopexit + +loopexit: +; CHECK-LABEL: loopexit: +; CHECK-NOT: @free + ret void +} + +define void @test3(i32 %loop_invariant) { +; CHECK-LABEL: @test3 +header: +; CHECK-LABEL: header: +; CHECK-NOT: @malloc +; CHECK: br label %loop + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: @mightmalloc +; CHECK: @malloc +; CHECK: @use +; CHECK: @free + call void @mightmalloc() + %ptr = call i8* @malloc(i32 %loop_invariant) + call void @use(i8* %ptr) + call void @free(i8* %ptr) + br i1 undef, label %loop, label %loopexit + +loopexit: +; CHECK-LABEL: loopexit: +; CHECK-NOT: @free + ret void +} + +define void @test4(i32 %loop_invariant) { +; CHECK-LABEL: @test4 +header: +; CHECK-LABEL: header: +; CHECK: @malloc +; CHECK: @malloc +; CHECK: @malloc +; CHECK: br label %loop + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-NOT: @malloc +; CHECK: @llvm.lifetime.start +; CHECK: @llvm.lifetime.start +; CHECK: @llvm.lifetime.start +; CHECK: @use +; CHECK: @use +; CHECK: @use +; CHECK: @llvm.lifetime.end +; CHECK: @llvm.lifetime.end +; CHECK: @llvm.lifetime.end +; CHECK-NOT: @free + %ptr1 = call i8* @malloc(i32 %loop_invariant) + %ptr2 = call i8* @malloc(i32 %loop_invariant) + %ptr3 = call i8* @malloc(i32 %loop_invariant) + call void @use(i8* %ptr1) + call void @use(i8* %ptr2) + call void @use(i8* %ptr3) + call void @free(i8* %ptr1) + call void @free(i8* %ptr2) + call void @free(i8* %ptr3) + br i1 undef, label %loop, label %loopexit + +loopexit: +; CHECK-LABEL: loopexit: +; CHECK: @free +; CHECK: @free +; CHECK: @free + ret void +} + +define void @test5(i32 %loop_invariant) { +; CHECK-LABEL: @test5 +header: +; CHECK-LABEL: header: +; CHECK: @malloc +; CHECK: @malloc +; CHECK: @malloc +; CHECK: br label %loop + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-NOT: @malloc +; CHECK: @llvm.lifetime.start +; CHECK: @llvm.lifetime.start +; CHECK: @llvm.lifetime.start +; CHECK: @use +; CHECK: @use +; CHECK: @use +; CHECK: @llvm.lifetime.end +; CHECK: @llvm.lifetime.end +; CHECK: @llvm.lifetime.end +; CHECK-NOT: @free + %ptr3 = call i8* @malloc(i32 %loop_invariant) + %ptr2 = call i8* @malloc(i32 %loop_invariant) + %ptr1 = call i8* @malloc(i32 %loop_invariant) + call void @use(i8* %ptr1) + call void @use(i8* %ptr2) + call void @use(i8* %ptr3) + call void @free(i8* %ptr1) + call void @free(i8* %ptr2) + call void @free(i8* %ptr3) + br i1 undef, label %loop, label %loopexit + +loopexit: +; CHECK-LABEL: loopexit: +; CHECK: @free +; CHECK: @free +; CHECK: @free + ret void +} + +define void @test6(i32 %loop_invariant) { +; CHECK-LABEL: @test6 +header: +; CHECK-LABEL: header: +; CHECK: @malloc +; CHECK: @malloc +; CHECK-NOT: @malloc +; CHECK: br label %loop + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: @llvm.lifetime.start +; CHECK: @malloc +; CHECK: @llvm.lifetime.start +; CHECK: @use +; CHECK: @use +; CHECK: @use +; CHECK: @llvm.lifetime.end +; CHECK: @free +; CHECK: @llvm.lifetime.end + %i = phi i32 [%loop_invariant, %header], [%i.1, %loop] + %ptr1 = call i8* @malloc(i32 %loop_invariant) + %ptr2 = call i8* @malloc(i32 %i) + %ptr3 = call i8* @malloc(i32 %loop_invariant) + call void @use(i8* %ptr1) + call void @use(i8* %ptr2) + call void @use(i8* %ptr3) + call void @free(i8* %ptr1) + call void @free(i8* %ptr2) + call void @free(i8* %ptr3) + %i.1 = add i32 %i, 1 + br i1 undef, label %loop, label %loopexit + +loopexit: +; CHECK-LABEL: loopexit: +; CHECK: @free +; CHECK: @free +; CHECK-NOT: @free +; CHECK: ret void + ret void +} + +define void @test7(i32 %loop_invariant) { +; CHECK-LABEL: @test7 +header: +; CHECK-LABEL: header: +; CHECK: @malloc +; CHECK: br label %loop + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-NOT: @malloc +; CHECK: @llvm.lifetime.start +; CHECK-NOT: @malloc +; CHECK: @use +; CHECK-NOT: @free +; CHECK-NOT: @llvm.lifetime.end + %i = phi i32 [%loop_invariant, %header], [%i.1, %loop.2], [%i.1, %loop.3] + %ptr = call i8* @malloc(i32 %loop_invariant) + call void @use(i8* %ptr) + %i.1 = add i32 %i, 1 + %cmp = icmp eq i32 %i.1, 123 + br i1 %cmp, label %loop.2, label %loop.3 + +loop.2: +; CHECK-LABEL: loop.2: +; CHECK-NOT: @free +; CHECK: @llvm.lifetime.end +; CHECK-NOT: @free + %continue.1 = call i1 @expr() + call void @free(i8* %ptr) + br i1 %continue.1, label %loop, label %loopexit + +loop.3: +; CHECK-LABEL: loop.3: +; CHECK-NOT: @free +; CHECK: @llvm.lifetime.end +; CHECK-NOT: @free + %continue.2 = call i1 @expr() + call void @free(i8* %ptr) + br i1 %continue.2, label %loop, label %loopexit + +loopexit: +; CHECK-LABEL: loopexit: +; CHECK: @free + ret void +} + +declare void @assert_fail() + +define void @test8(i32 %loop_invariant) { +; CHECK-LABEL: @test8 +header: +; CHECK-LABEL: header: +; CHECK: @malloc +; CHECK: br label %loop + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-NOT: @malloc +; CHECK: @llvm.lifetime.start +; CHECK-NOT: @malloc + %ptr = call i8* @malloc(i32 %loop_invariant) + %fail = call i1 @expr() + br i1 %fail, label %assert_fail, label %assert_pass + +assert_fail: + call void @assert_fail() + unreachable + +assert_pass: +; CHECK-LABEL: assert_pass: +; CHECK-NOT: @free +; CHECK: @llvm.lifetime.end +; CHECK-NOT: @free + call void @free(i8* %ptr) + br i1 undef, label %loop, label %loopexit + +loopexit: +; CHECK-LABEL: loopexit: +; CHECK: @free + ret void +} + +declare void @usei64(i64) + +define void @test9(i32 %loop_invariant) { +; CHECK-LABEL: @test9 +header: +; CHECK-LABEL: header: +; CHECK: add +; CHECK: @malloc +; CHECK: ptrtoint +; CHECK: and +; CHECK-NOT: usei64 +; CHECK: br label %loop + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-NOT: @malloc +; CHECK: @llvm.lifetime.start +; CHECK-NOT: @malloc +; CHECK: usei64 + %size = add i32 %loop_invariant, 1 + %ptr = call i8* @malloc(i32 %size) + %ptrbits = ptrtoint i8* %ptr to i64 + %aligncheck = and i64 %ptrbits, 4 + call void @usei64(i64%aligncheck) + %fail = call i1 @expr() + br i1 %fail, label %assert_fail, label %assert_pass + +assert_fail: + call void @assert_fail() + unreachable + +assert_pass: +; CHECK-LABEL: assert_pass: +; CHECK-NOT: @free +; CHECK: @llvm.lifetime.end +; CHECK-NOT: @free + call void @free(i8* %ptr) + br i1 undef, label %loop, label %loopexit + +loopexit: +; CHECK-LABEL: loopexit: +; CHECK: @free + ret void +} + +declare x86_stdcallcc void @_CxxThrowException(i8*, i8*) + +declare i32 @__CxxFrameHandler3(...) + +define void @test10(i32 %loop_invariant) personality i32 (...)* @__CxxFrameHandler3 { +; CHECK-LABEL: @test10 +header: + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: @malloc +; CHECK: @use +; CHECK: @free + %ptr = call i8* @malloc(i32 %loop_invariant) + call void @use(i8* %ptr) + call void @free(i8* %ptr) + invoke void @_CxxThrowException(i8* null, i8* null) + to label %continue unwind label %catch.dispatch + +continue: + %take.backedge = call i1 @expr() + br i1 %take.backedge, label %loop, label %loopexit + +catch.dispatch: + %cs = catchswitch within none [label %catch] unwind to caller + +catch: ; preds = %catch.dispatch + %cp = catchpad within %cs [i8* null, i32 64, i8* null] + catchret from %cp to label %loopexit + +loopexit: + ret void +} + +define void @test11(i32 %loop_invariant) personality i32 (...)* @__CxxFrameHandler3 { +; CHECK-LABEL: @test11 +header: +; CHECK-LABEL: header: +; CHECK: @malloc +; CHECK: br label %loop + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-NOT: @malloc +; CHECK: @llvm.lifetime.start +; CHECK-NOT: @malloc +; CHECK: @use + %ptr = call i8* @malloc(i32 %loop_invariant) + call void @use(i8* %ptr) + invoke void @_CxxThrowException(i8* null, i8* null) + to label %continue unwind label %catch.dispatch + +continue: +; CHECK_LABEL: continue: +; CHECK-NOT: free +; CHECK: lifetime.end +; CHECK-NOT: free + %take.backedge = call i1 @expr() + call void @free(i8* %ptr) + br i1 %take.backedge, label %loop, label %loopexit + +catch.dispatch: +; CHECK-LABEL: catch.dispatch: +; CHECK-NEXT: catchswitch + %cs = catchswitch within none [label %catch] unwind to caller + +catch: ; preds = %catch.dispatch + %cp = catchpad within %cs [i8* null, i32 64, i8* null] + catchret from %cp to label %loopexit + +loopexit: +; CHECK-LABEL: loopexit: +; CHECK: @free + ret void +} + +declare float @llvm.log.f32(float %Val) + +define void @test12(i32 %loop_invariant) { +; CHECK-LABEL: @test1 +header: +; CHECK-LABEL: header: +; CHECK: @malloc +; CHECK: br label %loop + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-NOT: @malloc +; CHECK: @llvm.lifetime.start +; CHECK-NOT: @malloc +; CHECK: @use +; CHECK-NOT: @free +; CHECK: @llvm.lifetime.end +; CHECK-NOT: @free + call float @llvm.log.f32(float 0.0) + %ptr = call i8* @malloc(i32 %loop_invariant) + call void @use(i8* %ptr) + call float @llvm.log.f32(float 0.0) + call void @free(i8* %ptr) + call float @llvm.log.f32(float 0.0) + br i1 undef, label %loop, label %loopexit + +loopexit: +; CHECK-LABEL: loopexit: +; CHECK: @free + ret void +} + +declare void @llvm.objc.autoreleasePoolPop(i8*) +declare i8* @llvm.objc.autoreleasePoolPush() + +define void @test13(i32 %loop_invariant) { +; CHECK-LABEL: @test13 +header: +; CHECK-LABEL: header: +; CHECK-NOT: @malloc +; CHECK: br label %loop + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: @malloc +; CHECK: @use +; CHECK: @free + %managed_ptr = call i8* @llvm.objc.autoreleasePoolPush() + %ptr = call i8* @malloc(i32 %loop_invariant) + call void @use(i8* %ptr) + call void @free(i8* %ptr) + br i1 undef, label %loop, label %loopexit + +loopexit: +; CHECK-LABEL: loopexit: +; CHECK-NOT: @free + ret void +} + +define void @test14(i32 %loop_invariant) { +; CHECK-LABEL: @test14 +header: +; CHECK-LABEL: header: +; CHECK-NOT: @malloc +; CHECK: br label %loop + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: @malloc +; CHECK: @use +; CHECK: @free + %ptr = call i8* @malloc(i32 %loop_invariant) + call void @use(i8* %ptr) + call void @free(i8* %ptr) + %managed_ptr = call i8* @llvm.objc.autoreleasePoolPush() + br i1 undef, label %loop, label %loopexit + +loopexit: +; CHECK-LABEL: loopexit: +; CHECK-NOT: @free + ret void +} + +declare i8 @expr8() + +define void @test15(i32 %loop_invariant) { +; CHECK-LABEL: @test15 +header: +; CHECK-LABEL: header: +; CHECK: @malloc +; CHECK: br label %loop + br label %loop + +loop: + %ptr = call i8* @malloc(i32 %loop_invariant) + call void @use(i8* %ptr) + %Val = call i8 @expr8() + switch i8 %Val, label %b1 [ i8 2, label %b2 + i8 3, label %b3 ] + +b1: + call void @usei64(i64 1) + %a = call i1 @expr() + call void @free(i8* %ptr), !dbg !6 + br i1 %a, label %loop, label %loopexit + +b2: + call void @usei64(i64 2) + %b = call i1 @expr() + call void @free(i8* %ptr), !dbg !7 + br i1 %b, label %loop, label %loopexit + +b3: + call void @usei64(i64 3) + %c = call i1 @expr() + call void @free(i8* %ptr), !dbg !8 + br i1 %c, label %loop, label %loopexit + +loopexit: +; CHECK-LABEL: loopexit: +; CHECK: @free{{.*}}, !dbg [[test15MergedLoc:![0-9]+]] + ret void +} + +define void @test16(i32 %loop_invariant, i8* %vptr) { +; CHECK-LABEL: @test16 +header: +; CHECK-LABEL: header: +; CHECK-NOT: @malloc +; CHECK: br label %loop + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: @malloc +; CHECK: @use +; CHECK: @free + %Val = load volatile i8, i8* %vptr + %ptr = call i8* @malloc(i32 %loop_invariant) + call void @use(i8* %ptr) + call void @free(i8* %ptr) + br i1 undef, label %loop, label %loopexit + +loopexit: +; CHECK-LABEL: loopexit: +; CHECK-NOT: @free + ret void +} + +define void @test17(i32 %loop_invariant) { +; CHECK-LABEL: @test17 +header: +; CHECK-LABEL: header: +; CHECK: @malloc +; CHECK: bitcast +; CHECK: getelementptr +; CHECK: br label %loop + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-NOT: @malloc +; CHECK: @llvm.lifetime.start +; CHECK-NOT: @malloc + %ptr = call i8* @malloc(i32 %loop_invariant) + %ptr2 = bitcast i8* %ptr to i8* + %ptr3 = getelementptr i8, i8* %ptr2, i32 0 +br label %nextloopblock + +nextloopblock: + %ptr4 = phi i8* [%ptr3, %loop] + call void @free(i8* %ptr4) + br i1 undef, label %loop, label %loopexit + +loopexit: +; CHECK-LABEL: loopexit: +; CHECK: @free + ret void +} + +define void @test18(i32 %loop_invariant) { +; CHECK-LABEL: @test18 +header: +; CHECK-LABEL: header: +; CHECK: @malloc +; CHECK: br label %loop + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-NOT: @malloc +; CHECK: @llvm.lifetime.start +; CHECK-NOT: @malloc + %ptr = call i8* @malloc(i32 %loop_invariant) + %continue = call i1 @expr() + %branch = call i1 @expr() + br i1 %branch, label %nextbb1, label %nextbb2 + +nextbb1: + %ptr2 = bitcast i8* %ptr to i8* + call void @free(i8* %ptr2) + br label %nextloopblock + +nextbb2: + call void @free(i8* %ptr) + br label %nextloopblock + +nextloopblock: + br i1 %continue, label %loop, label %loopexit + +loopexit: +; CHECK-LABEL: loopexit: +; CHECK-NOT: undef +; CHECK: @free + ret void +} + +declare void @might_malloc_or_throw() + +define void @test19(i32 %loop_invariant) personality i32 (...)* @__CxxFrameHandler3 { +; CHECK-LABEL: @test19 +header: +; CHECK-LABEL: header: +; CHECK-NOT: @malloc +; CHECK: br label %loop + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: @malloc +; CHECK: @use +; CHECK: @free +; CHECK: @might_malloc_or_throw + %ptr = call i8* @malloc(i32 %loop_invariant) + call void @use(i8* %ptr) + call void @free(i8* %ptr) + invoke void @might_malloc_or_throw() to label %loop unwind label %loopexit + +loopexit: +; CHECK-LABEL: loopexit: +; CHECK-NOT: @free + %lpad = landingpad { i8*, i32 } catch i8* null + ret void +} + +define void @test20(i32 %loop_invariant) { +; CHECK-LABEL: @test20 +header: +; CHECK-LABEL: header: +; CHECK: @malloc +; CHECK: br label %loop.header + br label %loop.header + +loop.header: +; CHECK-LABEL: loop.header: +; CHECK-NOT: @malloc +; CHECK: @llvm.lifetime.start +; CHECK-NOT: @malloc +; CHECK: @use +; CHECK-NOT: @free +; CHECK: @llvm.lifetime.end +; CHECK-NOT: @free + %ptr = call i8* @malloc(i32 %loop_invariant) + call void @use(i8* %ptr) + call void @free(i8* %ptr) + br i1 undef, label %loop.header, label %loop2 + +loop2: + br i1 undef, label %loop.header, label %loop.exit + +loop.exit: +; CHECK-LABEL: loop.exit: +; CHECK: @free + ret void +} + +define void @test21(i32 %loop_invariant) { +; CHECK-LABEL: @test21 +header: +; CHECK-LABEL: header: +; CHECK-NOT: @malloc +; CHECK: br label %loop.header + br label %loop.header + +loop.header: +; CHECK-LABEL: loop.header: +; CHECK: @malloc +; CHECK: @use +; CHECK: @free + %ptr = call i8* @malloc(i32 %loop_invariant) + call void @use(i8* %ptr) + call void @free(i8* %ptr) + br i1 undef, label %loop.header, label %loop2 + +loop2: +; CHECK-LABEL: loop2: +; CHECK: call i1 @expr + ; This call after @free may increase total heap memory and therefore must + ; stay after the free call. + %A = call i1 @expr() + br i1 %A, label %loop.header, label %loop.exit + +loop.exit: +; CHECK-LABEL: loop.exit: +; CHECK-NOT: @free + ret void +} + +!llvm.module.flags = !{!0, !1, !2} +!llvm.dbg.cu = !{!3} + +; CHECK: [[test15MergedLoc]] = !DILocation(line: 0 +!0 = !{i32 2, !"Dwarf Version", i32 4} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = !{i32 1, !"PIC Level", i32 2} +!3 = distinct !DICompileUnit(language: DW_LANG_C99, file: !4) +!4 = !DIFile(filename: "allocs.ll", directory: "test/Transforms/LICM") +!5 = distinct !DISubprogram(name: "test/Transforms/LICM/allocs.ll", unit: !3) +!6 = !DILocation(line: 1, column: 10, scope: !5) +!7 = !DILocation(line: 2, column: 20, scope: !5) +!8 = !DILocation(line: 3, column: 30, scope: !5)