diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1723,17 +1723,21 @@ bool checkForAllInstructions(function_ref Pred, const AbstractAttribute &QueryingAA, const ArrayRef &Opcodes, - bool CheckBBLivenessOnly = false); + bool CheckBBLivenessOnly = false, + bool CheckPotentiallyDead = false); /// Check \p Pred on all call-like instructions (=CallBased derived). /// /// See checkForAllCallLikeInstructions(...) for more information. bool checkForAllCallLikeInstructions(function_ref Pred, - const AbstractAttribute &QueryingAA) { + const AbstractAttribute &QueryingAA, + bool CheckBBLivenessOnly = false, + bool CheckPotentiallyDead = false) { return checkForAllInstructions(Pred, QueryingAA, {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, - (unsigned)Instruction::Call}); + (unsigned)Instruction::Call}, + CheckBBLivenessOnly, CheckPotentiallyDead); } /// Check \p Pred on all Read/Write instructions. @@ -3484,9 +3488,6 @@ /// Returns true if HeapToStack conversion is assumed to be possible. virtual bool isAssumedHeapToStack(CallBase &CB) const = 0; - /// Returns true if HeapToStack conversion is known to be possible. - virtual bool isKnownHeapToStack(CallBase &CB) const = 0; - /// Create an abstract attribute view for the position \p IRP. static AAHeapToStack &createForPosition(const IRPosition &IRP, Attributor &A); diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -1023,7 +1023,7 @@ Attributor *A, InformationCache::OpcodeInstMapTy &OpcodeInstMap, function_ref Pred, const AbstractAttribute *QueryingAA, const AAIsDead *LivenessAA, const ArrayRef &Opcodes, - bool CheckBBLivenessOnly = false) { + bool CheckBBLivenessOnly = false, bool CheckPotentiallyDead = false) { for (unsigned Opcode : Opcodes) { // Check if we have instructions with this opcode at all first. auto *Insts = OpcodeInstMap.lookup(Opcode); @@ -1032,8 +1032,9 @@ for (Instruction *I : *Insts) { // Skip dead instructions. - if (A && A->isAssumedDead(IRPosition::value(*I), QueryingAA, LivenessAA, - CheckBBLivenessOnly)) + if (A && !CheckPotentiallyDead && + A->isAssumedDead(IRPosition::value(*I), QueryingAA, LivenessAA, + CheckBBLivenessOnly)) continue; if (!Pred(*I)) @@ -1046,7 +1047,8 @@ bool Attributor::checkForAllInstructions(function_ref Pred, const AbstractAttribute &QueryingAA, const ArrayRef &Opcodes, - bool CheckBBLivenessOnly) { + bool CheckBBLivenessOnly, + bool CheckPotentiallyDead) { const IRPosition &IRP = QueryingAA.getIRPosition(); // Since we need to provide instructions we have to have an exact definition. @@ -1057,14 +1059,15 @@ // TODO: use the function scope once we have call site AAReturnedValues. const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); const auto *LivenessAA = - CheckBBLivenessOnly + (CheckBBLivenessOnly || CheckPotentiallyDead) ? nullptr : &(getAAFor(QueryingAA, QueryIRP, DepClassTy::NONE)); auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction); if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, &QueryingAA, - LivenessAA, Opcodes, CheckBBLivenessOnly)) + LivenessAA, Opcodes, CheckBBLivenessOnly, + CheckPotentiallyDead)) return false; return true; diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -11,8 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Constants.h" #include "llvm/Transforms/IPO/Attributor.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -27,9 +29,12 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/NoFolder.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" @@ -4836,23 +4841,116 @@ }; /// ----------------------- Heap-To-Stack Conversion --------------------------- -struct AAHeapToStackImpl : public AAHeapToStack { - AAHeapToStackImpl(const IRPosition &IRP, Attributor &A) +struct AAHeapToStackFunction final : public AAHeapToStack { + + struct AllocationInfo { + /// The call that allocates the memory. + CallBase *CB; + + /// The kind of allocation. + const enum { + MALLOC, + CALLOC, + ALIGNED_ALLOC, + } Kind; + + /// The library function id for the allocation. + LibFunc LibraryFunctionId; + + /// The status wrt. a rewrite. + enum { + STACK_DUE_TO_USE, + STACK_DUE_TO_FREE, + INVALID, + } Status = STACK_DUE_TO_USE; + + /// Flag to indicate if we encountered a use that might free this allocation + /// but which is not in the deallocation infos. + bool HasPotentiallyFreeingUnknownUses = false; + + /// The set of free calls that use this allocation. + SmallPtrSet PotentialFreeCalls; + }; + + struct DeallocationInfo { + /// The call that deallocates the memory. + CallBase *CB; + + /// Flag to indicate if we don't know all objects this deallocation might + /// free.. + bool MightFreeUnknownObjects = false; + + /// The set of allocation calls that are potentially freed. + SmallPtrSet PotentialAllocationCalls; + }; + + AAHeapToStackFunction(const IRPosition &IRP, Attributor &A) : AAHeapToStack(IRP, A) {} + void initialize(Attributor &A) override { + AAHeapToStack::initialize(A); + + const Function *F = getAnchorScope(); + const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); + + auto CallBaseCB = [&](Instruction &I) { + CallBase *CB = dyn_cast(&I); + if (!CB) + return true; + if (isFreeCall(CB, TLI)) { + DeallocationInfos[CB] = new (A.Allocator) DeallocationInfo{CB}; + return true; + } + bool IsMalloc = isMallocLikeFn(CB, TLI); + bool IsAlignedAllocLike = !IsMalloc && isAlignedAllocLikeFn(CB, TLI); + bool IsCalloc = + !IsMalloc && !IsAlignedAllocLike && isCallocLikeFn(CB, TLI); + if (!IsMalloc && !IsAlignedAllocLike && !IsCalloc) + return true; + auto Kind = IsMalloc ? AllocationInfo::MALLOC + : (IsCalloc ? AllocationInfo::CALLOC + : AllocationInfo::ALIGNED_ALLOC); + + AllocationInfo *AI = new (A.Allocator) AllocationInfo{CB, Kind}; + AllocationInfos[CB] = AI; + TLI->getLibFunc(*CB, AI->LibraryFunctionId); + return true; + }; + + bool Success = A.checkForAllCallLikeInstructions( + CallBaseCB, *this, /* CheckBBLivenessOnly */ false, + /* CheckPotentiallyDead */ true); + (void)Success; + assert(Success && "Did not expect the call base visit callback to fail!"); + } + const std::string getAsStr() const override { - return "[H2S] Mallocs Good/Bad: " + std::to_string(MallocCalls.size()) + - "/" + std::to_string(BadMallocCalls.size()); + unsigned NumH2SMallocs = 0, NumInvalidMallocs = 0; + for (const auto &It : AllocationInfos) { + if (It.second->Status == AllocationInfo::INVALID) + ++NumInvalidMallocs; + else + ++NumH2SMallocs; + } + return "[H2S] Mallocs Good/Bad: " + std::to_string(NumH2SMallocs) + "/" + + std::to_string(NumInvalidMallocs); } - bool isAssumedHeapToStack(CallBase &CB) const override { - return isValidState() && MallocCalls.contains(&CB) && - !BadMallocCalls.count(&CB); + /// See AbstractAttribute::trackStatistics(). + void trackStatistics() const override { + STATS_DECL( + MallocCalls, Function, + "Number of malloc/calloc/aligned_alloc calls converted to allocas"); + for (auto &It : AllocationInfos) + if (It.second->Status != AllocationInfo::INVALID) + ++BUILD_STAT_NAME(MallocCalls, Function); } - bool isKnownHeapToStack(CallBase &CB) const override { - return isValidState() && MallocCalls.contains(&CB) && - !BadMallocCalls.count(&CB); + bool isAssumedHeapToStack(CallBase &CB) const override { + if (isValidState()) + if (AllocationInfo *AI = AllocationInfos.lookup(&CB)) + return AI->Status != AllocationInfo::INVALID; + return false; } ChangeStatus manifest(Attributor &A) override { @@ -4863,76 +4961,82 @@ Function *F = getAnchorScope(); const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); - for (Instruction *MallocCall : MallocCalls) { - // This malloc cannot be replaced. - if (BadMallocCalls.count(MallocCall)) + for (auto &It : AllocationInfos) { + AllocationInfo &AI = *It.second; + if (AI.Status == AllocationInfo::INVALID) continue; - for (Instruction *FreeCall : FreesForMalloc[MallocCall]) { + for (CallBase *FreeCall : AI.PotentialFreeCalls) { LLVM_DEBUG(dbgs() << "H2S: Removing free call: " << *FreeCall << "\n"); A.deleteAfterManifest(*FreeCall); HasChanged = ChangeStatus::CHANGED; } - LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall + LLVM_DEBUG(dbgs() << "H2S: Removing malloc-like call: " << *AI.CB << "\n"); auto Remark = [&](OptimizationRemark OR) { LibFunc IsAllocShared; - if (auto *CB = dyn_cast(MallocCall)) { - TLI->getLibFunc(*CB, IsAllocShared); + if (TLI->getLibFunc(*AI.CB, IsAllocShared)) if (IsAllocShared == LibFunc___kmpc_alloc_shared) return OR << "Moving globalized variable to the stack."; - } return OR << "Moving memory allocation from the heap to the stack."; }; - A.emitRemark(MallocCall, "HeapToStack", Remark); + A.emitRemark(AI.CB, "HeapToStack", Remark); - Align Alignment; Value *Size; - if (isCallocLikeFn(MallocCall, TLI)) { - auto *Num = MallocCall->getOperand(0); - auto *SizeT = MallocCall->getOperand(1); - IRBuilder<> B(MallocCall); + Optional SizeAPI = getSize(A, *this, AI); + if (SizeAPI.hasValue()) { + Size = ConstantInt::get(AI.CB->getContext(), *SizeAPI); + } else if (AI.Kind == AllocationInfo::CALLOC) { + auto *Num = AI.CB->getOperand(0); + auto *SizeT = AI.CB->getOperand(1); + IRBuilder<> B(AI.CB); Size = B.CreateMul(Num, SizeT, "h2s.calloc.size"); - } else if (isAlignedAllocLikeFn(MallocCall, TLI)) { - Size = MallocCall->getOperand(1); - Alignment = MaybeAlign(cast(MallocCall->getOperand(0)) - ->getValue() - .getZExtValue()) - .valueOrOne(); + } else if (AI.Kind == AllocationInfo::ALIGNED_ALLOC) { + Size = AI.CB->getOperand(1); } else { - Size = MallocCall->getOperand(0); + Size = AI.CB->getOperand(0); + } + + Align Alignment(1); + if (AI.Kind == AllocationInfo::ALIGNED_ALLOC) { + Optional AlignmentAPI = + getAPInt(A, *this, *AI.CB->getArgOperand(0)); + assert(AlignmentAPI.hasValue() && + "Expected an alignment during manifest!"); + Alignment = + max(Alignment, MaybeAlign(AlignmentAPI.getValue().getZExtValue())); } - unsigned AS = cast(MallocCall->getType())->getAddressSpace(); - Instruction *AI = + unsigned AS = cast(AI.CB->getType())->getAddressSpace(); + Instruction *Alloca = new AllocaInst(Type::getInt8Ty(F->getContext()), AS, Size, Alignment, - "", MallocCall->getNextNode()); + "", AI.CB->getNextNode()); - if (AI->getType() != MallocCall->getType()) - AI = new BitCastInst(AI, MallocCall->getType(), "malloc_bc", - AI->getNextNode()); + if (Alloca->getType() != AI.CB->getType()) + Alloca = new BitCastInst(Alloca, AI.CB->getType(), "malloc_bc", + Alloca->getNextNode()); - A.changeValueAfterManifest(*MallocCall, *AI); + A.changeValueAfterManifest(*AI.CB, *Alloca); - if (auto *II = dyn_cast(MallocCall)) { + if (auto *II = dyn_cast(AI.CB)) { auto *NBB = II->getNormalDest(); - BranchInst::Create(NBB, MallocCall->getParent()); - A.deleteAfterManifest(*MallocCall); + BranchInst::Create(NBB, AI.CB->getParent()); + A.deleteAfterManifest(*AI.CB); } else { - A.deleteAfterManifest(*MallocCall); + A.deleteAfterManifest(*AI.CB); } // Zero out the allocated memory if it was a calloc. - if (isCallocLikeFn(MallocCall, TLI)) { - auto *BI = new BitCastInst(AI, MallocCall->getType(), "calloc_bc", - AI->getNextNode()); + if (AI.Kind == AllocationInfo::CALLOC) { + auto *BI = new BitCastInst(Alloca, AI.CB->getType(), "calloc_bc", + Alloca->getNextNode()); Value *Ops[] = { BI, ConstantInt::get(F->getContext(), APInt(8, 0, false)), Size, ConstantInt::get(Type::getInt1Ty(F->getContext()), false)}; - Type *Tys[] = {BI->getType(), MallocCall->getOperand(0)->getType()}; + Type *Tys[] = {BI->getType(), AI.CB->getOperand(0)->getType()}; Module *M = F->getParent(); Function *Fn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys); CallInst::Create(Fn, Ops, "", BI->getNextNode()); @@ -4943,21 +5047,58 @@ return HasChanged; } - /// Collection of all malloc calls in a function. - SmallSetVector MallocCalls; + Optional getAPInt(Attributor &A, const AbstractAttribute &AA, + Value &V) { + bool UsedAssumedInformation = false; + Optional SimpleV = + A.getAssumedConstant(V, AA, UsedAssumedInformation); + if (!SimpleV.hasValue()) + return APInt(64, 0); + if (auto *CI = dyn_cast_or_null(SimpleV.getValue())) + return CI->getValue(); + return llvm::None; + } + + Optional getSize(Attributor &A, const AbstractAttribute &AA, + AllocationInfo &AI) { + + if (AI.Kind == AllocationInfo::MALLOC) + return getAPInt(A, AA, *AI.CB->getArgOperand(0)); + + if (AI.Kind == AllocationInfo::ALIGNED_ALLOC) + // Only if the alignment is also constant we return a size. + return getAPInt(A, AA, *AI.CB->getArgOperand(0)).hasValue() + ? getAPInt(A, AA, *AI.CB->getArgOperand(1)) + : llvm::None; + + assert(AI.Kind == AllocationInfo::CALLOC && + "Expected only callocs are left"); + Optional Num = getAPInt(A, AA, *AI.CB->getArgOperand(0)); + Optional Size = getAPInt(A, AA, *AI.CB->getArgOperand(1)); + if (!Num.hasValue() || !Size.hasValue()) + return llvm::None; + bool Overflow = false; + Size = Size.getValue().umul_ov(Num.getValue(), Overflow); + return Overflow ? llvm::None : Size; + } - /// Collection of malloc calls that cannot be converted. - DenseSet BadMallocCalls; + /// Collection of all malloc-like calls in a function with associated + /// information. + DenseMap AllocationInfos; - /// A map for each malloc call to the set of associated free calls. - DenseMap> FreesForMalloc; + /// Collection of all free-like calls in a function with associated + /// information. + DenseMap DeallocationInfos; ChangeStatus updateImpl(Attributor &A) override; }; -ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) { +ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { + ChangeStatus Changed = ChangeStatus::UNCHANGED; const Function *F = getAnchorScope(); - const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); + + const auto &LivenessAA = + A.getAAFor(*this, IRPosition::function(*F), DepClassTy::NONE); MustBeExecutedContextExplorer &Explorer = A.getInfoCache().getMustBeExecutedContextExplorer(); @@ -4965,7 +5106,66 @@ bool StackIsAccessibleByOtherThreads = A.getInfoCache().stackIsAccessibleByOtherThreads(); - auto FreeCheck = [&](Instruction &I) { + // Flag to ensure we update our deallocation information at most once per + // updateImpl call and only if we use the free check reasoning. + bool HasUpdatedFrees = false; + + auto UpdateFrees = [&]() { + HasUpdatedFrees = true; + + for (auto &It : DeallocationInfos) { + DeallocationInfo &DI = *It.second; + // For now we cannot use deallocations that have unknown inputs, skip + // them. + if (DI.MightFreeUnknownObjects) + continue; + + // No need to analyze dead calls, ignore them instead. + if (A.isAssumedDead(*DI.CB, this, &LivenessAA, + /* CheckBBLivenessOnly */ true)) + continue; + + // Use the optimistic version to get the freed objects, ignoring dead + // branches etc. + SmallVector Objects; + if (!getAssumedUnderlyingObjects(A, *DI.CB->getArgOperand(0), Objects, + *this, DI.CB)) { + LLVM_DEBUG( + dbgs() + << "[H2S] Unexpected failure in getAssumedUnderlyingObjects!\n"); + DI.MightFreeUnknownObjects = true; + continue; + } + + // Check each object explicitly. + for (auto *Obj : Objects) { + // Free of null and undef can be ignored as no-ops (or UB in the latter + // case). + if (isa(Obj) || isa(Obj)) + continue; + + CallBase *ObjCB = dyn_cast(Obj); + if (!ObjCB) { + LLVM_DEBUG(dbgs() + << "[H2S] Free of a non-call object: " << *Obj << "\n"); + DI.MightFreeUnknownObjects = true; + continue; + } + + AllocationInfo *AI = AllocationInfos.lookup(ObjCB); + if (!AI) { + LLVM_DEBUG(dbgs() << "[H2S] Free of a non-allocation object: " << *Obj + << "\n"); + DI.MightFreeUnknownObjects = true; + continue; + } + + DI.PotentialAllocationCalls.insert(ObjCB); + } + } + }; + + auto FreeCheck = [&](AllocationInfo &AI) { // If the stack is not accessible by other threads, the "must-free" logic // doesn't apply as the pointer could be shared and needs to be places in // "shareable" memory. @@ -4979,19 +5179,55 @@ return false; } } - const auto &Frees = FreesForMalloc.lookup(&I); - if (Frees.size() != 1) { + if (!HasUpdatedFrees) + UpdateFrees(); + + // TODO: Allow multi exit functions that have different free calls. + if (AI.PotentialFreeCalls.size() != 1) { LLVM_DEBUG(dbgs() << "[H2S] did not find one free call but " - << Frees.size() << "\n"); + << AI.PotentialFreeCalls.size() << "\n"); return false; } - Instruction *UniqueFree = *Frees.begin(); - return Explorer.findInContextOf(UniqueFree, I.getNextNode()); + CallBase *UniqueFree = *AI.PotentialFreeCalls.begin(); + DeallocationInfo *DI = DeallocationInfos.lookup(UniqueFree); + if (!DI) { + LLVM_DEBUG( + dbgs() << "[H2S] unique free call was not known as deallocation call " + << *UniqueFree << "\n"); + return false; + } + if (DI->MightFreeUnknownObjects) { + LLVM_DEBUG( + dbgs() << "[H2S] unique free call might free unkown allocations\n"); + return false; + } + if (DI->PotentialAllocationCalls.size() > 1) { + LLVM_DEBUG(dbgs() << "[H2S] unique free call might free " + << DI->PotentialAllocationCalls.size() + << " different allocations\n"); + return false; + } + if (*DI->PotentialAllocationCalls.begin() != AI.CB) { + LLVM_DEBUG( + dbgs() + << "[H2S] unique free call not known to free this allocation but " + << **DI->PotentialAllocationCalls.begin() << "\n"); + return false; + } + Instruction *CtxI = isa(AI.CB) ? AI.CB : AI.CB->getNextNode(); + if (!Explorer.findInContextOf(UniqueFree, CtxI)) { + LLVM_DEBUG( + dbgs() + << "[H2S] unique free call might not be executed with the allocation " + << *UniqueFree << "\n"); + return false; + } + return true; }; - auto UsesCheck = [&](Instruction &I) { + auto UsesCheck = [&](AllocationInfo &AI) { bool ValidUsesOnly = true; - bool MustUse = true; + auto Pred = [&](const Use &U, bool &Follow) -> bool { Instruction *UserI = cast(U.getUser()); if (isa(UserI)) @@ -5009,15 +5245,8 @@ if (auto *CB = dyn_cast(UserI)) { if (!CB->isArgOperand(&U) || CB->isLifetimeStartOrEnd()) return true; - // Record malloc. - if (isFreeCall(UserI, TLI)) { - if (MustUse) { - FreesForMalloc[&I].insert(UserI); - } else { - LLVM_DEBUG(dbgs() << "[H2S] free potentially on different mallocs: " - << *UserI << "\n"); - ValidUsesOnly = false; - } + if (DeallocationInfos.count(CB)) { + AI.PotentialFreeCalls.insert(CB); return true; } @@ -5032,8 +5261,12 @@ *this, IRPosition::callsite_argument(*CB, ArgNo), DepClassTy::OPTIONAL); - if (!NoCaptureAA.isAssumedNoCapture() || - !ArgNoFreeAA.isAssumedNoFree()) { + bool MaybeCaptured = !NoCaptureAA.isAssumedNoCapture(); + bool MaybeFreed = !ArgNoFreeAA.isAssumedNoFree(); + if (MaybeCaptured || + (AI.LibraryFunctionId != LibFunc___kmpc_alloc_shared && + MaybeFreed)) { + AI.HasPotentiallyFreeingUnknownUses |= MaybeFreed; // Emit a missed remark if this is missed OpenMP globalization. auto Remark = [&](OptimizationRemarkMissed ORM) { @@ -5043,13 +5276,9 @@ : "freed."); }; - LibFunc IsAllocShared; - if (auto *AllocShared = dyn_cast(&I)) { - TLI->getLibFunc(*AllocShared, IsAllocShared); - if (IsAllocShared == LibFunc___kmpc_alloc_shared) - A.emitRemark( - AllocShared, "HeapToStackFailed", Remark); - } + if (AI.LibraryFunctionId == LibFunc___kmpc_alloc_shared) + A.emitRemark(AI.CB, "HeapToStackFailed", + Remark); LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n"); ValidUsesOnly = false; @@ -5059,7 +5288,6 @@ if (isa(UserI) || isa(UserI) || isa(UserI) || isa(UserI)) { - MustUse &= !(isa(UserI) || isa(UserI)); Follow = true; return true; } @@ -5069,96 +5297,64 @@ ValidUsesOnly = false; return true; }; - A.checkForAllUses(Pred, *this, I); + A.checkForAllUses(Pred, *this, *AI.CB); return ValidUsesOnly; }; - auto MallocCallocCheck = [&](Instruction &I) { - if (BadMallocCalls.count(&I)) - return true; - - bool IsMalloc = isMallocLikeFn(&I, TLI); - bool IsAlignedAllocLike = isAlignedAllocLikeFn(&I, TLI); - bool IsCalloc = !IsMalloc && isCallocLikeFn(&I, TLI); - if (!IsMalloc && !IsAlignedAllocLike && !IsCalloc) { - BadMallocCalls.insert(&I); - return true; - } + // The actual update starts here. We look at all allocations and depending on + // their status perform the appropriate check(s). + for (auto &It : AllocationInfos) { + AllocationInfo &AI = *It.second; + if (AI.Status == AllocationInfo::INVALID) + continue; - if (IsMalloc) { - if (MaxHeapToStackSize == -1) { - if (UsesCheck(I) || FreeCheck(I)) { - MallocCalls.insert(&I); - return true; - } - } - if (auto *Size = dyn_cast(I.getOperand(0))) - if (Size->getValue().ule(MaxHeapToStackSize)) - if (UsesCheck(I) || FreeCheck(I)) { - MallocCalls.insert(&I); - return true; - } - } else if (IsAlignedAllocLike && isa(I.getOperand(0))) { - if (MaxHeapToStackSize == -1) { - if (UsesCheck(I) || FreeCheck(I)) { - MallocCalls.insert(&I); - return true; - } - } - // Only if the alignment and sizes are constant. - if (auto *Size = dyn_cast(I.getOperand(1))) - if (Size->getValue().ule(MaxHeapToStackSize)) - if (UsesCheck(I) || FreeCheck(I)) { - MallocCalls.insert(&I); - return true; - } - } else if (IsCalloc) { - if (MaxHeapToStackSize == -1) { - if (UsesCheck(I) || FreeCheck(I)) { - MallocCalls.insert(&I); - return true; + if (MaxHeapToStackSize == -1) { + if (AI.Kind == AllocationInfo::ALIGNED_ALLOC) + if (!getAPInt(A, *this, *AI.CB->getArgOperand(0)).hasValue()) { + LLVM_DEBUG(dbgs() << "[H2S] Unknown allocation alignment: " << *AI.CB + << "\n"); + AI.Status = AllocationInfo::INVALID; + Changed = ChangeStatus::CHANGED; + continue; } + } else { + Optional Size = getSize(A, *this, AI); + if (!Size.hasValue() || Size.getValue().ugt(MaxHeapToStackSize)) { + LLVM_DEBUG({ + if (!Size.hasValue()) + dbgs() << "[H2S] Unknown allocation size (or alignment): " << *AI.CB + << "\n"; + else + dbgs() << "[H2S] Allocation size to large: " << *AI.CB << " vs. " + << MaxHeapToStackSize << "\n"; + }); + + AI.Status = AllocationInfo::INVALID; + Changed = ChangeStatus::CHANGED; + continue; } - bool Overflow = false; - if (auto *Num = dyn_cast(I.getOperand(0))) - if (auto *Size = dyn_cast(I.getOperand(1))) - if ((Size->getValue().umul_ov(Num->getValue(), Overflow)) - .ule(MaxHeapToStackSize)) - if (!Overflow && (UsesCheck(I) || FreeCheck(I))) { - MallocCalls.insert(&I); - return true; - } } - BadMallocCalls.insert(&I); - return true; - }; - - size_t NumBadMallocs = BadMallocCalls.size(); - - A.checkForAllCallLikeInstructions(MallocCallocCheck, *this); - - if (NumBadMallocs != BadMallocCalls.size()) - return ChangeStatus::CHANGED; + switch (AI.Status) { + case AllocationInfo::STACK_DUE_TO_USE: + if (UsesCheck(AI)) + continue; + AI.Status = AllocationInfo::STACK_DUE_TO_FREE; + LLVM_FALLTHROUGH; + case AllocationInfo::STACK_DUE_TO_FREE: + if (FreeCheck(AI)) + continue; + AI.Status = AllocationInfo::INVALID; + Changed = ChangeStatus::CHANGED; + continue; + case AllocationInfo::INVALID: + llvm_unreachable("Invalid allocations should never reach this point!"); + }; + } - return ChangeStatus::UNCHANGED; + return Changed; } -struct AAHeapToStackFunction final : public AAHeapToStackImpl { - AAHeapToStackFunction(const IRPosition &IRP, Attributor &A) - : AAHeapToStackImpl(IRP, A) {} - - /// See AbstractAttribute::trackStatistics(). - void trackStatistics() const override { - STATS_DECL( - MallocCalls, Function, - "Number of malloc/calloc/aligned_alloc calls converted to allocas"); - for (auto *C : MallocCalls) - if (!BadMallocCalls.count(C)) - ++BUILD_STAT_NAME(MallocCalls, Function); - } -}; - /// ----------------------- Privatizable Pointers ------------------------------ struct AAPrivatizablePtrImpl : public AAPrivatizablePtr { AAPrivatizablePtrImpl(const IRPosition &IRP, Attributor &A) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2470,7 +2470,7 @@ ChangeStatus Changed = ChangeStatus::UNCHANGED; for (CallBase *CB : MallocCalls) { // Skip replacing this if HeapToStack has already claimed it. - if (HS && HS->isKnownHeapToStack(*CB)) + if (HS && HS->isAssumedHeapToStack(*CB)) continue; // Find the unique free call to remove it. diff --git a/llvm/test/Transforms/Attributor/depgraph.ll b/llvm/test/Transforms/Attributor/depgraph.ll --- a/llvm/test/Transforms/Attributor/depgraph.ll +++ b/llvm/test/Transforms/Attributor/depgraph.ll @@ -123,7 +123,7 @@ ; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument ; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument ; GRAPH-EMPTY: -; GRAPH-NEXT: [AAHeapToStack] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state [H2S] Mallocs Good/Bad: 0/1 +; GRAPH-NEXT: [AAHeapToStack] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state [H2S] Mallocs Good/Bad: 0/0 ; GRAPH-EMPTY: ; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state not-simple ; GRAPH-EMPTY: diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll --- a/llvm/test/Transforms/Attributor/heap_to_stack.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll @@ -832,6 +832,64 @@ store i8* %1, i8** %P ret void } + +define void @test17(i1 %c, i8* %A) { +; IS________OPM-LABEL: define {{[^@]+}}@test17 +; IS________OPM-SAME: (i1 [[C:%.*]], i8* nocapture nofree [[A:%.*]]) { +; IS________OPM-NEXT: entry: +; IS________OPM-NEXT: [[M:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) +; IS________OPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; IS________OPM: t: +; IS________OPM-NEXT: br i1 false, label [[DEAD:%.*]], label [[F2:%.*]] +; IS________OPM: f: +; IS________OPM-NEXT: br label [[J:%.*]] +; IS________OPM: f2: +; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[M]]) +; IS________OPM-NEXT: br label [[J]] +; IS________OPM: dead: +; IS________OPM-NEXT: unreachable +; IS________OPM: j: +; IS________OPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[M]]) +; IS________OPM-NEXT: tail call void @free(i8* nocapture [[M]]) +; IS________OPM-NEXT: ret void +; +; IS________NPM-LABEL: define {{[^@]+}}@test17 +; IS________NPM-SAME: (i1 [[C:%.*]], i8* nocapture nofree [[A:%.*]]) { +; IS________NPM-NEXT: entry: +; IS________NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 +; IS________NPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; IS________NPM: t: +; IS________NPM-NEXT: br i1 false, label [[DEAD:%.*]], label [[F2:%.*]] +; IS________NPM: f: +; IS________NPM-NEXT: br label [[J:%.*]] +; IS________NPM: f2: +; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP0]]) +; IS________NPM-NEXT: br label [[J]] +; IS________NPM: dead: +; IS________NPM-NEXT: unreachable +; IS________NPM: j: +; IS________NPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP0]]) +; IS________NPM-NEXT: ret void +; +entry: + %add = add i64 2, 2 + %m = tail call noalias i8* @malloc(i64 %add) + br i1 %c, label %t, label %f +t: + br i1 false, label %dead, label %f2 +f: + br label %j +f2: + tail call void @no_sync_func(i8* %m) + br label %j +dead: + br label %j +j: + %phi = phi i8* [ %m, %f ], [ %m, %f2 ], [ %A, %dead ] + tail call void @no_sync_func(i8* %phi) + tail call void @free(i8* %phi) + ret void +} ;. ; IS________OPM: attributes #[[ATTR0:[0-9]+]] = { nounwind willreturn } ; IS________OPM: attributes #[[ATTR1:[0-9]+]] = { nofree nosync willreturn } diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -12,8 +12,8 @@ define void @kernel() { ; CHECK-LABEL: define {{[^@]+}}@kernel() { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @foo() #[[ATTR0:[0-9]+]] -; CHECK-NEXT: call void @bar() #[[ATTR0]] +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @bar() ; CHECK-NEXT: ret void ; entry: @@ -24,7 +24,7 @@ define internal void @foo() { ; CHECK-LABEL: define {{[^@]+}}@foo -; CHECK-SAME: () #[[ATTR0]] { +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 ; CHECK-NEXT: ret void @@ -40,7 +40,7 @@ ; CHECK-LABEL: define {{[^@]+}}@bar ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR0]] +; CHECK-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR0]], !dbg [[DBG6:![0-9]+]] ; CHECK-NEXT: call void @share(i8* nofree writeonly [[TMP0]]) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]]) #[[ATTR0]] ; CHECK-NEXT: ret void