diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -97,6 +97,7 @@ #define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H #include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/PassManager.h" @@ -151,6 +152,14 @@ return FuncRWInstsMap[&F]; } + /// Return TargetLibraryInfo for function \p F. + TargetLibraryInfo *getTargetLibraryInfoForFunction(const Function &F) { + return FuncTLIMap[&F]; + } + + /// Return datalayout used in the module. + const DataLayout &getDL() { return DL; } + private: /// A map type from functions to opcode to instruction maps. using FuncInstOpcodeMapTy = DenseMap; @@ -158,6 +167,9 @@ /// A map type from functions to their read or write instructions. using FuncRWInstsMapTy = DenseMap; + /// A map type from functions to their TLI. + using FuncTLIMapTy = DenseMap; + /// A nested map that remembers all instructions in a function with a certain /// instruction opcode (Instruction::getOpcode()). FuncInstOpcodeMapTy FuncInstOpcodeMap; @@ -165,6 +177,9 @@ /// A map from functions to their instructions that may read or write memory. FuncRWInstsMapTy FuncRWInstsMap; + /// A map from functions to their TLI. + FuncTLIMapTy FuncTLIMap; + /// The datalayout used in the module. const DataLayout &DL; @@ -307,6 +322,7 @@ /// /// \param F The function that is checked for attribute opportunities. /// \param Whitelist If not null, a set limiting the attribute opportunities. + /// \param TLIGetter helper function to get TargetLibraryInfo Analysis result. /// /// Note that abstract attribute instances are generally created even if the /// IR already contains the information they would deduce. The most important @@ -314,7 +330,9 @@ /// instance, which can be queried without the need to look at the IR in /// various places. void identifyDefaultAbstractAttributes( - Function &F, DenseSet *Whitelist = nullptr); + Function &F, + std::function &TLIGetter, + DenseSet *Whitelist = nullptr); /// Check \p Pred on all function call sites. /// @@ -1121,6 +1139,26 @@ static const char ID; }; +struct AAHeapToStack : public StateWrapper, + public IRPosition { + IRPositionConstructorForward(AAHeapToStack, IRPosition); + + /// Returns true if HeapToStack conversion is assumed to be possible. + bool isAssumedHeapToStack() const { return getAssumed(); } + + /// Returns true if HeapToStack conversion is known to be possible. + bool isKnownHeapToStack() const { return getKnown(); } + + /// Return an IR position, see struct IRPosition. + /// + ///{ + IRPosition &getIRPosition() { return *this; } + const IRPosition &getIRPosition() const { return *this; } + ///} + + static const char ID; +}; + } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -25,6 +25,7 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -109,6 +110,11 @@ cl::desc("Verify the Attributor deduction and " "manifestation of attributes -- may issue false-positive errors"), cl::init(false)); +static cl::opt EnableHeapToStack("enable-heap-to-stack-conversion", + cl::init(false), cl::Hidden); + +static cl::opt MaxHeapToStackSize("max-heap-to-stack-size", + cl::init(1024), cl::Hidden); /// Logic operators for the change status enum class. /// @@ -2051,6 +2057,299 @@ } }; +struct AAHeapToStackImpl : public AAHeapToStack { + IRPositionConstructorForward(AAHeapToStackImpl, AAHeapToStack); + + const std::string getAsStr() const override { + return "[H2S] Mallocs: " + std::to_string(MallocCalls.size()) + + " [H2S] Frees: " + std::to_string(FreeCalls.size()); + } + + void initialize(Attributor &A) override { + const Function &F = getAnchorScope(); + if (F.hasFnAttribute(Attribute::NoFree)) { + indicatePessimisticFixpoint(); + return; + } + + const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(F); + + for (auto &BB : F) + for (auto &I : BB) { + if (isMallocLikeFn(&I, TLI)) + if (auto *CSize = dyn_cast(I.getOperand(0))) + if (CSize->getValue().sle(MaxHeapToStackSize)) { + MallocCalls.insert(const_cast(&I)); + LLVM_DEBUG(dbgs() << "H2S: Initial malloc call: " << I << "\n"); + } + + if (isFreeCall(&I, TLI)) { + FreeCalls.insert(const_cast(&I)); + LLVM_DEBUG(dbgs() << "H2S: Initial free call: " << I << "\n"); + } + } + + // No malloc calls, there is no work to be done. + if (MallocCalls.empty()) + indicatePessimisticFixpoint(); + + SmallVector BadFreeCalls; + // Check if at leat one MallocCall has corresponding free, otherwise there + // is no work to be done. + for (auto *FreeCall : FreeCalls) { + SmallVector FreeUOs; + GetUnderlyingObjects(FreeCall->getOperand(0), FreeUOs, + A.getInfoCache().getDL()); + + for (auto *UO : FreeUOs) { + if (!isa(UO) || + !MallocCalls.count(cast(UO))) { + BadFreeCalls.push_back(FreeCall); + LLVM_DEBUG(dbgs() << "H2S: Bad free call: " << *FreeCall + << " UO: " << *UO << "\n"); + break; + } + + FreesForMalloc[const_cast(cast(UO))] + .insert(FreeCall); + } + } + + for (auto *BadFreeCall : BadFreeCalls) + FreeCalls.erase(BadFreeCall); + + if (FreeCalls.empty()) + indicatePessimisticFixpoint(); + } + + ChangeStatus manifest(Attributor &A) override { + assert(getState().isValidState() && + "Attempted to manifest an invalid state!"); + + ChangeStatus HasChanged = ChangeStatus::UNCHANGED; + Function &F = getAnchorScope(); + + for (Instruction *MallocCall : MallocCalls) { + LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall + << "\n"); + + unsigned AS = cast(MallocCall->getType())->getAddressSpace(); + Instruction *AI = new AllocaInst(Type::getInt8Ty(F.getContext()), AS, + MallocCall->getOperand(0)); + F.begin()->getInstList().insert(F.begin()->begin(), AI); + + if (AI->getType() != MallocCall->getType()) { + auto *BC = new BitCastInst(AI, MallocCall->getType()); + BC->insertAfter(AI); + AI = BC; + } + + MallocCall->replaceAllUsesWith(AI); + + if (auto *II = dyn_cast(MallocCall)) { + auto *NBB = II->getNormalDest(); + BranchInst::Create(NBB, MallocCall->getParent()); + MallocCall->eraseFromParent(); + } else { + MallocCall->eraseFromParent(); + } + HasChanged = ChangeStatus::CHANGED; + } + + for (Instruction *FreeCall : FreeCalls) { + LLVM_DEBUG(dbgs() << "H2S: Removing free call: " << *FreeCall << "\n"); + FreeCall->eraseFromParent(); + HasChanged = ChangeStatus::CHANGED; + } + + return HasChanged; + } + + SmallPtrSet MallocCalls, FreeCalls; + + /// A map for each malloc call to the set of associated free calls. + DenseMap> FreesForMalloc; + + /// check if \p MallocCall is freed or captured in BasicBlock starting from + /// \p I. + bool checkMallocCall(Attributor &A, Instruction *MallocCall, Instruction *I, + std::set &Ptrs, + const TargetLibraryInfo *TLI); + + /// Check if BB has unexpected exit. If pointer is not captured, it can still + /// be convertet to stack allocation. + bool checkUnexpectedExit(Attributor &A, Instruction *MallocCall, BasicBlock *BB, + std::set &Ptrs); + + ChangeStatus updateImpl(Attributor &A) override; +}; + +bool AAHeapToStackImpl::checkMallocCall(Attributor &A, Instruction *MallocCall, + Instruction *I, + std::set &Ptrs, + const TargetLibraryInfo *TLI) { + auto *LivenessAA = A.getAAFor(*this, getAnchorScope()); + + do { + if (LivenessAA && LivenessAA->isAssumedDead(I)) + continue; + + if (isFreeCall(I, TLI) && Ptrs.count((Instruction *)I->getOperand(0))) { + LLVM_DEBUG(dbgs() << "H2S: Paired malloc call: " << *MallocCall + << " with free at: " << *I << "\n"); + return true; + } + + if (ImmutableCallSite ICS = ImmutableCallSite(I)) { + if (!ICS.hasFnAttr(Attribute::NoSync)) + return false; + + auto *NoSyncAA = A.getAAFor(*this, *I); + + /// Function cannot syncronize. + if (NoSyncAA && !NoSyncAA->isAssumedNoSync()) + return false; + } + + if (auto *GEPI = dyn_cast(I)) { + if (Ptrs.count((Instruction *)GEPI->getPointerOperand())) + Ptrs.insert(I); + } else if (I->getOpcode() == Instruction::BitCast || + I->getOpcode() == Instruction::AddrSpaceCast) { + if (Ptrs.count((Instruction *)I->getOperand(0))) + Ptrs.insert(I); + } else if (auto CS = CallSite(&*I)) { + if (Ptrs.count((Instruction *)CS.getReturnedArgOperand())) + Ptrs.insert(I); + } else if (auto *SI = dyn_cast(I)) { + if (Ptrs.count((Instruction *)SI->getTrueValue()) || + Ptrs.count((Instruction *)SI->getFalseValue())) + Ptrs.insert(I); + } + } while ((I = I->getNextNode())); + + return false; +} + +bool AAHeapToStackImpl::checkUnexpectedExit(Attributor &A, Instruction *MallocCall, + BasicBlock *BB, + std::set &Ptrs) { + auto *LivenessAA = A.getAAFor(*this, getAnchorScope()); + + for (Instruction &I : *BB) { + if (&I == MallocCall) + continue; + + if (LivenessAA && LivenessAA->isAssumedDead(&I)) + continue; + + if (ImmutableCallSite ICS = ImmutableCallSite(&I)) { + if (!ICS.hasFnAttr(Attribute::NoSync)) + return false; + + auto *NoSyncAA = A.getAAFor(*this, I); + + /// Function cannot syncronize. + if (NoSyncAA && !NoSyncAA->isAssumedNoSync()) + return false; + } + + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) { + for (auto *Ptr : Ptrs) + // TODO: add an OBB cache to the capturing querry & provide DT. + if (PointerMayBeCapturedBefore(Ptr, /* ReturnCaptures */ true, + /* StoreCaptures */ true, &I, nullptr, + /* IncludeI */ true)) { + LLVM_DEBUG(dbgs() << "H2S: Pointer: " << *Ptr + << " from malloc call: " << *MallocCall + << " captured before, or at, " + "the potential exit at: " + << I << "\n"); + LLVM_DEBUG(dbgs() << "H2S: Bad malloc call: " << *MallocCall + << " found potential exit at: " << I << "\n"); + return false; + } + } + } + + return true; +} + +ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) { + Function &F = getAnchorScope(); + SmallVector BadMallocCalls; + bool BadMalloc = true; + + for (auto *MallocCall : MallocCalls) { + if (FreesForMalloc[MallocCall].empty()) { + BadMallocCalls.push_back(MallocCall); + LLVM_DEBUG(dbgs() << "H2S: Bad malloc call (no frees): " << *MallocCall + << "\n"); + continue; + } + + const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(F); + std::set Ptrs; + Ptrs.insert(MallocCall); + + BasicBlock *CurrBB = MallocCall->getParent(); + SmallVector WorkList; + + WorkList.push_back(CurrBB); + + // Check all the paths. If we find one path that doesn't free or an exit + // that captures the pointer, we are done. + while (!WorkList.empty()) { + BasicBlock *BB = WorkList.pop_back_val(); + + // Starting point + Instruction *I = MallocCall->getParent() == BB ? MallocCall->getNextNode() + : &BB->front(); + + if (!checkMallocCall(A, MallocCall, I, Ptrs, TLI)) { + BadMalloc = true; + if (!checkUnexpectedExit(A, MallocCall, BB, Ptrs)) + break; + + for (auto *SuccBB : successors(BB)) { + for (PHINode &PN : SuccBB->phis()) + if (Ptrs.count((Instruction *)PN.getIncomingValueForBlock(CurrBB))) + Ptrs.insert(&PN); + WorkList.push_back(SuccBB); + } + continue; + } + + BadMalloc = false; + } + + if (BadMalloc) + BadMallocCalls.push_back(MallocCall); + } + + for (auto *BadMallocCall : BadMallocCalls) { + MallocCalls.erase(BadMallocCall); + for (auto *FreeCall : FreesForMalloc[BadMallocCall]) + FreeCalls.erase(FreeCall); + } + + return ChangeStatus::UNCHANGED; +} + +struct AAHeapToStackFunction final : public AAHeapToStackImpl { + AAHeapToStackFunction(Function &F) : AAHeapToStackImpl(F, IRP_FUNCTION) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECL(MallocCalls, Function, + "Number of MallocCalls"); + BUILD_STAT_NAME(MallocCalls, Function) += MallocCalls.size(); + STATS_DECL(FreeCalls, Function, + "Number of FreeCalls"); + BUILD_STAT_NAME(FreeCalls, Function) += FreeCalls.size(); + } +}; + /// ---------------------------------------------------------------------------- /// Attributor /// ---------------------------------------------------------------------------- @@ -2349,7 +2648,14 @@ } void Attributor::identifyDefaultAbstractAttributes( - Function &F, DenseSet *Whitelist) { + Function &F, + std::function &TLIGetter, + DenseSet *Whitelist) { + + if (!F.isDeclaration()) + InfoCache.FuncTLIMap[&F] = &TLIGetter(F); + else + LLVM_DEBUG(dbgs() << "DECLARATION DOESN'T HAVE TLI\n"); // Check for dead BasicBlocks in every function. // We need dead instruction detection because we do not want to deal with @@ -2411,6 +2717,10 @@ } } + // Every function might be applicable for Heap-To-Stack conversion. + if (EnableHeapToStack) + checkAndRegisterAA(F, *this, nullptr, F, -1); + // Walk all instructions to find more attribute opportunities and also // interesting instructions that might be queried by abstract attributes // during their initialization or update. @@ -2519,7 +2829,8 @@ /// Pass (Manager) Boilerplate /// ---------------------------------------------------------------------------- -static bool runAttributorOnModule(Module &M) { +static bool runAttributorOnModule( + Module &M, std::function &TLIGetter) { if (DisableAttributor) return false; @@ -2553,14 +2864,23 @@ // Populate the Attributor with abstract attribute opportunities in the // function and the information cache with IR information. - A.identifyDefaultAbstractAttributes(F); + A.identifyDefaultAbstractAttributes(F, TLIGetter); } return A.run() == ChangeStatus::CHANGED; } + + PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) { - if (runAttributorOnModule(M)) { + auto &FAM = AM.getResult(M).getManager(); + + std::function TLIGetter = + [&](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; + + if (runAttributorOnModule(M, TLIGetter)) { // FIXME: Think about passes we will preserve and add them here. return PreservedAnalyses::none(); } @@ -2579,11 +2899,19 @@ bool runOnModule(Module &M) override { if (skipModule(M)) return false; - return runAttributorOnModule(M); + + std::function TLIGetter = + [&](Function &F) -> TargetLibraryInfo & { + return getAnalysis(F).getTLI(); + }; + + return runAttributorOnModule(M, TLIGetter); } void getAnalysisUsage(AnalysisUsage &AU) const override { // FIXME: Think about passes we will preserve and add them here. + ModulePass::getAnalysisUsage(AU); + AU.addRequired(); AU.setPreservesCFG(); } }; @@ -2606,8 +2934,10 @@ const char AAIsDead::ID = 0; const char AADereferenceable::ID = 0; const char AAAlign::ID = 0; +const char AAHeapToStack::ID = 0; INITIALIZE_PASS_BEGIN(AttributorLegacyPass, "attributor", "Deduce and propagate attributes", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(AttributorLegacyPass, "attributor", "Deduce and propagate attributes", false, false) diff --git a/llvm/test/Transforms/FunctionAttrs/heap_to_stack.ll b/llvm/test/Transforms/FunctionAttrs/heap_to_stack.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/heap_to_stack.ll @@ -0,0 +1,120 @@ +; RUN: opt -passes=attributor --attributor-disable=false -enable-heap-to-stack-conversion=true -S < %s | FileCheck %s + +declare noalias i8* @malloc(i64) + +declare void @nocapture_func_frees_pointer(i8* nocapture) + +declare void @func_throws(...) + +declare void @sync_func(i8* %p) + +declare void @no_sync_func(i8* %p) nofree nosync willreturn + +declare void @nofree_func(i8* %p) nofree nosync willreturn + +declare i32 @no_return_call() noreturn + +declare void @free(i8* nocapture) + +; TEST 1 - negative, pointer freed in another function. + +define void @test1() { + %1 = tail call noalias i8* @malloc(i64 4) + ; CHECK: @malloc(i64 4) + ; CHECK-NEXT: @nocapture_func_frees_pointer(i8* %1) + tail call void @nocapture_func_frees_pointer(i8* %1) + tail call void (...) @func_throws() + tail call void @free(i8* %1) + ret void +} + +; TEST 2 - negative, call to a sync function. + +define void @test2() { + %1 = tail call noalias i8* @malloc(i64 4) + ; CHECK: @malloc(i64 4) + ; CHECK-NEXT: @sync_func(i8* %1) + tail call void @sync_func(i8* %1) + tail call void @free(i8* %1) + ret void +} + +; TEST 3 - 1 malloc, 1 free + +define void @test3() { + %1 = tail call noalias i8* @malloc(i64 4) + ; CHECK: %1 = alloca i8, i64 4 + ; CHECK-NEXT: @no_sync_func(i8* %1) + tail call void @no_sync_func(i8* %1) + ; CHECK-NOT: @free(i8* %1) + tail call void @free(i8* %1) + ret void +} + +; TEST 4 - negative, no call to free + +define void @test4() { + %1 = tail call noalias i8* @malloc(i64 4) + ; CHECK: @malloc(i64 4) + ; CHECK-NEXT: @nofree_func(i8* %1) + tail call void @nofree_func(i8* %1) + ret void +} + +; TEST 5 - not all exit paths have a call to free + +define void @test5(i32) { + %2 = tail call noalias i8* @malloc(i64 4) + ; CHECK: @malloc(i64 4) + ; CHECK-NEXT: icmp eq i32 %0, 0 + %3 = icmp eq i32 %0, 0 + br i1 %3, label %5, label %4 + +4: ; preds = %1 + tail call void @nofree_func(i8* %2) + br label %6 + +5: ; preds = %1 + tail call void @free(i8* %2) + ; CHECK: @free(i8* %2) + br label %6 + +6: ; preds = %5, %4 + ret void +} + +; TEST 6 - all exit paths have a call to free + +define void @test6(i32) { + %2 = tail call noalias i8* @malloc(i64 4) + ; CHECK: %2 = alloca i8, i64 4 + ; CHECK-NEXT: icmp eq i32 %0, 0 + %3 = icmp eq i32 %0, 0 + br i1 %3, label %5, label %4 + +4: ; preds = %1 + tail call void @nofree_func(i8* %2) + tail call void @free(i8* %2) + ; CHECK-NOT: @free(i8* %2) + br label %5 + +5: ; preds = %1 + tail call void @free(i8* %2) + ; CHECK-NOT: @free(i8* %2) + br label %6 + +6: ; preds = %5, %4 + ret void +} + +; TEST 7 - free is dead. + +define void @test7() { + %1 = tail call noalias i8* @malloc(i64 4) + ; CHECK: @malloc(i64 4) + ; CHECK-NEXT: call i32 @no_return_call() + tail call i32 @no_return_call() + ; this free is dead. So malloc cannot be transformed. + tail call void @free(i8* %1) + ret void +}