Index: llvm/trunk/include/llvm/Transforms/IPO/Attributor.h =================================================================== --- llvm/trunk/include/llvm/Transforms/IPO/Attributor.h +++ llvm/trunk/include/llvm/Transforms/IPO/Attributor.h @@ -620,7 +620,7 @@ /// as the Attributor is not destroyed (it owns the attributes now). /// /// \Returns CHANGED if the IR was changed, otherwise UNCHANGED. - ChangeStatus run(); + ChangeStatus run(Module &M); /// Lookup an abstract attribute of type \p AAType at position \p IRP. While /// no abstract attribute is found equivalent positions are checked, see @@ -694,6 +694,16 @@ /// various places. void identifyDefaultAbstractAttributes(Function &F); + /// Mark the internal function \p F as live. + /// + /// This will trigger the identification and initialization of attributes for + /// \p F. + void markLiveInternalFunction(const Function &F) { + assert(F.hasInternalLinkage() && + "Only internal linkage is assumed dead initially."); + identifyDefaultAbstractAttributes(const_cast(F)); + } + /// Record that \p I is deleted after information was manifested. void deleteAfterManifest(Instruction &I) { ToBeDeletedInsts.insert(&I); } @@ -856,6 +866,9 @@ /// If not null, a set limiting the attribute opportunities. const DenseSet *Whitelist; + /// A set to remember the functions we already assume to be live and visited. + DenseSet VisitedFunctions; + /// Functions, blocks, and instructions we delete after manifest is done. /// ///{ Index: llvm/trunk/lib/Transforms/IPO/Attributor.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/Attributor.cpp +++ llvm/trunk/lib/Transforms/IPO/Attributor.cpp @@ -1796,7 +1796,7 @@ void exploreFromEntry(Attributor &A, const Function *F) { ToBeExploredPaths.insert(&(F->getEntryBlock().front())); - AssumedLiveBlocks.insert(&(F->getEntryBlock())); + assumeLive(A, F->getEntryBlock()); for (size_t i = 0; i < ToBeExploredPaths.size(); ++i) if (const Instruction *NextNoReturnI = @@ -1882,7 +1882,7 @@ } if (SplitPos == &NormalDestBB->front()) - AssumedLiveBlocks.insert(NormalDestBB); + assumeLive(A, *NormalDestBB); } BB = SplitPos->getParent(); @@ -1946,6 +1946,23 @@ return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F); } + /// Assume \p BB is (partially) live now and indicate to the Attributor \p A + /// that internal function called from \p BB should now be looked at. + void assumeLive(Attributor &A, const BasicBlock &BB) { + if (!AssumedLiveBlocks.insert(&BB).second) + return; + + // We assume that all of BB is (probably) live now and if there are calls to + // internal functions we will assume that those are now live as well. This + // is a performance optimization for blocks with calls to a lot of internal + // functions. It can however cause dead functions to be treated as live. + for (const Instruction &I : BB) + if (ImmutableCallSite ICS = ImmutableCallSite(&I)) + if (const Function *F = ICS.getCalledFunction()) + if (F->hasInternalLinkage()) + A.markLiveInternalFunction(*F); + } + /// Collection of to be explored paths. SmallSetVector ToBeExploredPaths; @@ -1961,18 +1978,6 @@ /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { - STATS_DECL( - DeadInternalFunction, Function, - "Number of internal functions classified as dead (no live callsite)"); - BUILD_STAT_NAME(DeadInternalFunction, Function) += - (getAssociatedFunction()->hasInternalLinkage() && - AssumedLiveBlocks.empty()) - ? 1 - : 0; - STATS_DECL(DeadBlocks, Function, - "Number of basic blocks classified as dead"); - BUILD_STAT_NAME(DeadBlocks, Function) += - getAssociatedFunction()->size() - AssumedLiveBlocks.size(); STATS_DECL(PartiallyDeadBlocks, Function, "Number of basic blocks classified as partially dead"); BUILD_STAT_NAME(PartiallyDeadBlocks, Function) += NoReturnCalls.size(); @@ -2016,7 +2021,7 @@ // Use nounwind to justify the unwind block is dead as well. const auto &AANoUnw = A.getAAFor(*this, IPos); if (!Invoke2CallAllowed || !AANoUnw.isAssumedNoUnwind()) { - AssumedLiveBlocks.insert(Invoke->getUnwindDest()); + assumeLive(A, *Invoke->getUnwindDest()); ToBeExploredPaths.insert(&Invoke->getUnwindDest()->front()); } } @@ -2031,7 +2036,7 @@ // get new paths (reachable blocks). for (const BasicBlock *SuccBB : successors(BB)) { - AssumedLiveBlocks.insert(SuccBB); + assumeLive(A, *SuccBB); ToBeExploredPaths.insert(&SuccBB->front()); } @@ -2040,22 +2045,8 @@ } ChangeStatus AAIsDeadImpl::updateImpl(Attributor &A) { - const Function *F = getAssociatedFunction(); ChangeStatus Status = ChangeStatus::UNCHANGED; - if (F->hasInternalLinkage() && AssumedLiveBlocks.empty()) { - auto CallSiteCheck = [&](CallSite) { return false; }; - - // All callsites of F are dead. - if (A.checkForAllCallSites(CallSiteCheck, *this, true)) - return ChangeStatus::UNCHANGED; - - // There exists at least one live call site, so we explore the function. - Status = ChangeStatus::CHANGED; - - exploreFromEntry(A, F); - } - // Temporary collection to iterate over existing noreturn instructions. This // will alow easier modification of NoReturnCalls collection SmallVector NoReturnChanged; @@ -3099,11 +3090,7 @@ return true; } -ChangeStatus Attributor::run() { - // Initialize all abstract attributes, allow new ones to be created. - for (unsigned u = 0; u < AllAbstractAttributes.size(); u++) - AllAbstractAttributes[u]->initialize(*this); - +ChangeStatus Attributor::run(Module &M) { LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized " << AllAbstractAttributes.size() << " abstract attributes.\n"); @@ -3256,7 +3243,7 @@ if (VerifyAttributor && FinishedAtFixpoint && ManifestChange == ChangeStatus::CHANGED) { VerifyAttributor = false; - ChangeStatus VerifyStatus = run(); + ChangeStatus VerifyStatus = run(M); if (VerifyStatus != ChangeStatus::UNCHANGED) llvm_unreachable( "Attributor verification failed, re-run did result in an IR change " @@ -3274,26 +3261,64 @@ "Expected the final number of abstract attributes to remain unchanged!"); // Delete stuff at the end to avoid invalid references and a nice order. - LLVM_DEBUG(dbgs() << "\n[Attributor] Delete " << ToBeDeletedFunctions.size() - << " functions and " << ToBeDeletedBlocks.size() - << " blocks and " << ToBeDeletedInsts.size() - << " instructions\n"); - for (Instruction *I : ToBeDeletedInsts) { - if (!I->use_empty()) - I->replaceAllUsesWith(UndefValue::get(I->getType())); - I->eraseFromParent(); - } - - if (unsigned NumDeadBlocks = ToBeDeletedBlocks.size()) { - SmallVector ToBeDeletedBBs; - ToBeDeletedBBs.reserve(NumDeadBlocks); - ToBeDeletedBBs.append(ToBeDeletedBlocks.begin(), ToBeDeletedBlocks.end()); - DeleteDeadBlocks(ToBeDeletedBBs); - } - - for (Function *Fn : ToBeDeletedFunctions) { - Fn->replaceAllUsesWith(UndefValue::get(Fn->getType())); - Fn->eraseFromParent(); + { + LLVM_DEBUG(dbgs() << "\n[Attributor] Delete at least " + << ToBeDeletedFunctions.size() << " functions and " + << ToBeDeletedBlocks.size() << " blocks and " + << ToBeDeletedInsts.size() << " instructions\n"); + for (Instruction *I : ToBeDeletedInsts) { + if (!I->use_empty()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + } + + if (unsigned NumDeadBlocks = ToBeDeletedBlocks.size()) { + SmallVector ToBeDeletedBBs; + ToBeDeletedBBs.reserve(NumDeadBlocks); + ToBeDeletedBBs.append(ToBeDeletedBlocks.begin(), ToBeDeletedBlocks.end()); + DeleteDeadBlocks(ToBeDeletedBBs); + STATS_DECLTRACK(AAIsDead, BasicBlock, + "Number of dead basic blocks deleted."); + } + + STATS_DECL(AAIsDead, Function, "Number of dead functions deleted."); + for (Function *Fn : ToBeDeletedFunctions) { + Fn->replaceAllUsesWith(UndefValue::get(Fn->getType())); + Fn->eraseFromParent(); + STATS_TRACK(AAIsDead, Function); + } + + // Identify dead internal functions and delete them. This happens outside + // the other fixpoint analysis as we might treat potentially dead functions + // as live to lower the number of iterations. If they happen to be dead, the + // below fixpoint loop will identify and eliminate them. + SmallVector InternalFns; + for (Function &F : M) + if (F.hasInternalLinkage()) + InternalFns.push_back(&F); + + bool FoundDeadFn = true; + while (FoundDeadFn) { + FoundDeadFn = false; + for (unsigned u = 0, e = InternalFns.size(); u < e; ++u) { + Function *F = InternalFns[u]; + if (!F) + continue; + + const auto *LivenessAA = + lookupAAFor(IRPosition::function(*F)); + if (LivenessAA && + !checkForAllCallSites([](CallSite CS) { return false; }, + *LivenessAA, true)) + continue; + + STATS_TRACK(AAIsDead, Function); + F->replaceAllUsesWith(UndefValue::get(F->getType())); + F->eraseFromParent(); + InternalFns[u] = nullptr; + FoundDeadFn = true; + } + } } if (VerifyMaxFixpointIterations && @@ -3309,6 +3334,8 @@ } void Attributor::identifyDefaultAbstractAttributes(Function &F) { + if (!VisitedFunctions.insert(&F).second) + return; IRPosition FPos = IRPosition::function(F); @@ -3526,12 +3553,22 @@ F.hasFnAttribute(Attribute::OptimizeNone)) continue; + // We look at internal functions only on-demand but if any use is not a + // direct call, we have to do it eagerly. + if (F.hasInternalLinkage()) { + if (llvm::all_of(F.uses(), [](const Use &U) { + return ImmutableCallSite(U.getUser()) && + ImmutableCallSite(U.getUser()).isCallee(&U); + })) + continue; + } + // Populate the Attributor with abstract attribute opportunities in the // function and the information cache with IR information. A.identifyDefaultAbstractAttributes(F); } - return A.run() == ChangeStatus::CHANGED; + return A.run(M) == ChangeStatus::CHANGED; } PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) { Index: llvm/trunk/test/Transforms/FunctionAttrs/internal-noalias.ll =================================================================== --- llvm/trunk/test/Transforms/FunctionAttrs/internal-noalias.ll +++ llvm/trunk/test/Transforms/FunctionAttrs/internal-noalias.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -attributor -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=8 < %s | FileCheck %s +; RUN: opt -S -attributor -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=6 < %s | FileCheck %s define dso_local i32 @visible(i32* noalias %A, i32* noalias %B) #0 { entry: Index: llvm/trunk/test/Transforms/FunctionAttrs/liveness.ll =================================================================== --- llvm/trunk/test/Transforms/FunctionAttrs/liveness.ll +++ llvm/trunk/test/Transforms/FunctionAttrs/liveness.ll @@ -1,4 +1,4 @@ -; RUN: opt -attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=7 -S < %s | FileCheck %s +; RUN: opt -attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=11 -S < %s | FileCheck %s declare void @no_return_call() nofree noreturn nounwind readnone @@ -15,7 +15,7 @@ ; This internal function has no live call sites, so all its BBs are considered dead, ; and nothing should be deduced for it. -; FIXME-NOT: define internal i32 @dead_internal_func(i32 %0) +; CHECK-NOT: define internal i32 @dead_internal_func(i32 %0) define internal i32 @dead_internal_func(i32 %0) { %2 = icmp slt i32 %0, 1 br i1 %2, label %3, label %5 @@ -417,3 +417,249 @@ unreachable } +define linkonce_odr void @non_exact1() { + call void @non_dead_a0() + call void @non_dead_a1() + call void @non_dead_a2() + call void @non_dead_a3() + call void @non_dead_a4() + call void @non_dead_a5() + call void @non_dead_a6() + call void @non_dead_a7() + call void @non_dead_a8() + call void @non_dead_a9() + call void @non_dead_a10() + call void @non_dead_a11() + call void @non_dead_a12() + call void @non_dead_a13() + call void @non_dead_a14() + call void @non_dead_a15() + call void @middle() + ret void +} +define internal void @middle() { +bb0: + call void @non_dead_b0() + call void @non_dead_b1() + call void @non_dead_b2() + call void @non_dead_b3() +br label %bb1 +bb1: + call void @non_dead_b4() + call void @non_dead_b5() + call void @non_dead_b6() + call void @non_dead_b7() +br label %bb2 +bb2: + call void @non_dead_b8() + call void @non_dead_b9() + call void @non_dead_b10() + call void @non_dead_b11() +br label %bb3 +bb3: + call void @non_dead_b12() + call void @non_dead_b13() + call void @non_dead_b14() + call void @non_dead_b15() +br label %bb4 +bb4: + call void @non_exact2() + ret void +} +define linkonce_odr void @non_exact2() { + call void @non_dead_c0() + call void @non_dead_c1() + call void @non_dead_c2() + call void @non_dead_c3() + call void @non_dead_c4() + call void @non_dead_c5() + call void @non_dead_c6() + call void @non_dead_c7() + call void @non_dead_c8() + call void @non_dead_c9() + call void @non_dead_c10() + call void @non_dead_c11() + call void @non_dead_c12() + call void @non_dead_c13() + call void @non_dead_c14() + call void @non_dead_c15() + call void @non_exact3() + ret void +} +define linkonce_odr void @non_exact3() { + call void @non_dead_d0() + call void @non_dead_d1() + call void @non_dead_d2() + call void @non_dead_d3() + call void @non_dead_d4() + call void @non_dead_d5() + call void @non_dead_d6() + call void @non_dead_d7() + call void @non_dead_d8() + call void @non_dead_d9() + call void @non_dead_d10() + call void @non_dead_d11() + call void @non_dead_d12() + call void @non_dead_d13() + call void @non_dead_d14() + call void @non_dead_d15() + %nr = call i32 @foo_noreturn() + call void @dead_e1() + ret void +} +; CHECK: define linkonce_odr void @non_exact3() { +; CHECK-NEXT: call void @non_dead_d0() +; CHECK-NEXT: call void @non_dead_d1() +; CHECK-NEXT: call void @non_dead_d2() +; CHECK-NEXT: call void @non_dead_d3() +; CHECK-NEXT: call void @non_dead_d4() +; CHECK-NEXT: call void @non_dead_d5() +; CHECK-NEXT: call void @non_dead_d6() +; CHECK-NEXT: call void @non_dead_d7() +; CHECK-NEXT: call void @non_dead_d8() +; CHECK-NEXT: call void @non_dead_d9() +; CHECK-NEXT: call void @non_dead_d10() +; CHECK-NEXT: call void @non_dead_d11() +; CHECK-NEXT: call void @non_dead_d12() +; CHECK-NEXT: call void @non_dead_d13() +; CHECK-NEXT: call void @non_dead_d14() +; CHECK-NEXT: call void @non_dead_d15() +; CHECK-NEXT: %nr = call i32 @foo_noreturn() +; CHECK-NEXT: unreachable +; CHECK-NEXT: } + +define internal void @non_dead_a0() { ret void } +define internal void @non_dead_a1() { ret void } +define internal void @non_dead_a2() { ret void } +define internal void @non_dead_a3() { ret void } +define internal void @non_dead_a4() { ret void } +define internal void @non_dead_a5() { ret void } +define internal void @non_dead_a6() { ret void } +define internal void @non_dead_a7() { ret void } +define internal void @non_dead_a8() { ret void } +define internal void @non_dead_a9() { ret void } +define internal void @non_dead_a10() { ret void } +define internal void @non_dead_a11() { ret void } +define internal void @non_dead_a12() { ret void } +define internal void @non_dead_a13() { ret void } +define internal void @non_dead_a14() { ret void } +define internal void @non_dead_a15() { ret void } +define internal void @non_dead_b0() { ret void } +define internal void @non_dead_b1() { ret void } +define internal void @non_dead_b2() { ret void } +define internal void @non_dead_b3() { ret void } +define internal void @non_dead_b4() { ret void } +define internal void @non_dead_b5() { ret void } +define internal void @non_dead_b6() { ret void } +define internal void @non_dead_b7() { ret void } +define internal void @non_dead_b8() { ret void } +define internal void @non_dead_b9() { ret void } +define internal void @non_dead_b10() { ret void } +define internal void @non_dead_b11() { ret void } +define internal void @non_dead_b12() { ret void } +define internal void @non_dead_b13() { ret void } +define internal void @non_dead_b14() { ret void } +define internal void @non_dead_b15() { ret void } +define internal void @non_dead_c0() { ret void } +define internal void @non_dead_c1() { ret void } +define internal void @non_dead_c2() { ret void } +define internal void @non_dead_c3() { ret void } +define internal void @non_dead_c4() { ret void } +define internal void @non_dead_c5() { ret void } +define internal void @non_dead_c6() { ret void } +define internal void @non_dead_c7() { ret void } +define internal void @non_dead_c8() { ret void } +define internal void @non_dead_c9() { ret void } +define internal void @non_dead_c10() { ret void } +define internal void @non_dead_c11() { ret void } +define internal void @non_dead_c12() { ret void } +define internal void @non_dead_c13() { ret void } +define internal void @non_dead_c14() { ret void } +define internal void @non_dead_c15() { ret void } +define internal void @non_dead_d0() { ret void } +define internal void @non_dead_d1() { ret void } +define internal void @non_dead_d2() { ret void } +define internal void @non_dead_d3() { ret void } +define internal void @non_dead_d4() { ret void } +define internal void @non_dead_d5() { ret void } +define internal void @non_dead_d6() { ret void } +define internal void @non_dead_d7() { ret void } +define internal void @non_dead_d8() { ret void } +define internal void @non_dead_d9() { ret void } +define internal void @non_dead_d10() { ret void } +define internal void @non_dead_d11() { ret void } +define internal void @non_dead_d12() { ret void } +define internal void @non_dead_d13() { ret void } +define internal void @non_dead_d14() { ret void } +define internal void @non_dead_d15() { ret void } +define internal void @dead_e0() { call void @dead_e1() ret void } +define internal void @dead_e1() { call void @dead_e2() ret void } +define internal void @dead_e2() { ret void } + +; CHECK: define internal void @non_dead_a0() +; CHECK: define internal void @non_dead_a1() +; CHECK: define internal void @non_dead_a2() +; CHECK: define internal void @non_dead_a3() +; CHECK: define internal void @non_dead_a4() +; CHECK: define internal void @non_dead_a5() +; CHECK: define internal void @non_dead_a6() +; CHECK: define internal void @non_dead_a7() +; CHECK: define internal void @non_dead_a8() +; CHECK: define internal void @non_dead_a9() +; CHECK: define internal void @non_dead_a10() +; CHECK: define internal void @non_dead_a11() +; CHECK: define internal void @non_dead_a12() +; CHECK: define internal void @non_dead_a13() +; CHECK: define internal void @non_dead_a14() +; CHECK: define internal void @non_dead_a15() +; CHECK: define internal void @non_dead_b0() +; CHECK: define internal void @non_dead_b1() +; CHECK: define internal void @non_dead_b2() +; CHECK: define internal void @non_dead_b3() +; CHECK: define internal void @non_dead_b4() +; CHECK: define internal void @non_dead_b5() +; CHECK: define internal void @non_dead_b6() +; CHECK: define internal void @non_dead_b7() +; CHECK: define internal void @non_dead_b8() +; CHECK: define internal void @non_dead_b9() +; CHECK: define internal void @non_dead_b10() +; CHECK: define internal void @non_dead_b11() +; CHECK: define internal void @non_dead_b12() +; CHECK: define internal void @non_dead_b13() +; CHECK: define internal void @non_dead_b14() +; CHECK: define internal void @non_dead_b15() +; CHECK: define internal void @non_dead_c0() +; CHECK: define internal void @non_dead_c1() +; CHECK: define internal void @non_dead_c2() +; CHECK: define internal void @non_dead_c3() +; CHECK: define internal void @non_dead_c4() +; CHECK: define internal void @non_dead_c5() +; CHECK: define internal void @non_dead_c6() +; CHECK: define internal void @non_dead_c7() +; CHECK: define internal void @non_dead_c8() +; CHECK: define internal void @non_dead_c9() +; CHECK: define internal void @non_dead_c10() +; CHECK: define internal void @non_dead_c11() +; CHECK: define internal void @non_dead_c12() +; CHECK: define internal void @non_dead_c13() +; CHECK: define internal void @non_dead_c14() +; CHECK: define internal void @non_dead_c15() +; CHECK: define internal void @non_dead_d0() +; CHECK: define internal void @non_dead_d1() +; CHECK: define internal void @non_dead_d2() +; CHECK: define internal void @non_dead_d3() +; CHECK: define internal void @non_dead_d4() +; CHECK: define internal void @non_dead_d5() +; CHECK: define internal void @non_dead_d6() +; CHECK: define internal void @non_dead_d7() +; CHECK: define internal void @non_dead_d8() +; CHECK: define internal void @non_dead_d9() +; CHECK: define internal void @non_dead_d10() +; CHECK: define internal void @non_dead_d11() +; CHECK: define internal void @non_dead_d12() +; CHECK: define internal void @non_dead_d13() +; CHECK: define internal void @non_dead_d14() +; Verify we actually deduce information for these functions. +; CHECK: Function Attrs: nofree nosync nounwind willreturn +; CHECK-NEXT: define internal void @non_dead_d15() +; CHECK-NOT: define internal void @dead_e Index: llvm/trunk/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll =================================================================== --- llvm/trunk/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll +++ llvm/trunk/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll @@ -1,4 +1,4 @@ -; RUN: opt -functionattrs -enable-nonnull-arg-prop -attributor -attributor-manifest-internal -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=9 -S < %s | FileCheck %s +; RUN: opt -functionattrs -enable-nonnull-arg-prop -attributor -attributor-manifest-internal -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=6 -S < %s | FileCheck %s ; ; This is an evolved example to stress test SCC parameter attribute propagation. ; The SCC in this test is made up of the following six function, three of which