Index: include/llvm/Analysis/CGSCCPassManager.h =================================================================== --- include/llvm/Analysis/CGSCCPassManager.h +++ include/llvm/Analysis/CGSCCPassManager.h @@ -89,7 +89,10 @@ #include "llvm/ADT/PriorityWorklist.h" #include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueHandle.h" namespace llvm { @@ -453,6 +456,145 @@ return CGSCCToFunctionPassAdaptor(std::move(Pass), DebugLogging); } + +/// A helper that repeats an SCC pass each time an indirect call is refined to +/// a direct call by that pass. +/// +/// While the CGSCC pass manager works to re-visit SCCs and RefSCCs as they +/// change shape, we may also want to repeat an SCC pass if it simply refines +/// an indirect call to a direct call, even if doing so does not alter the +/// shape of the graph. +/// +/// This repeatition has the potential to be very large however, as each one +/// might refine a single call site. As a consequence, in practice we use an +/// upper bound on the number of repeatitions to limit things. +template +class DevirtSCCRepeatedPass + : public PassInfoMixin> { +public: + explicit DevirtSCCRepeatedPass(PassT Pass, int MaxRepeatitions, + bool DebugLogging = false) + : Pass(std::move(Pass)), MaxRepeatitions(MaxRepeatitions), + DebugLogging(DebugLogging) {} + // We have to explicitly define all the special member functions because MSVC + // refuses to generate them. + DevirtSCCRepeatedPass(const DevirtSCCRepeatedPass &Arg) + : Pass(Arg.Pass), MaxRepeatitions(Arg.MaxRepeatitions), + DebugLogging(Arg.DebugLogging) {} + DevirtSCCRepeatedPass(DevirtSCCRepeatedPass &&Arg) + : Pass(std::move(Arg.Pass)), MaxRepeatitions(Arg.MaxRepeatitions), + DebugLogging(Arg.DebugLogging) {} + friend void swap(DevirtSCCRepeatedPass &LHS, + DevirtSCCRepeatedPass &RHS) { + using std::swap; + swap(LHS.Pass, RHS.Pass); + swap(LHS.MaxRepeatitions, RHS.MaxRepeatitions); + swap(LHS.DebugLogging, RHS.DebugLogging); + } + DevirtSCCRepeatedPass &operator=(DevirtSCCRepeatedPass RHS) { + swap(*this, RHS); + return *this; + } + + /// Runs the wrapped pass up to \c MaxRepeatitions on the SCC, iterating + /// whenever an indirect call is refined. + PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &UR) { + PreservedAnalyses PA = PreservedAnalyses::all(); + + // The SCC may be refined while we are running passes over it, so set up + // a pointer that we can update. + LazyCallGraph::SCC *C = &InitialC; + + // Collect value handles for all of the indirect call sites. + SmallVector CallHandles; + SmallPtrSet CallSet; + + for (int i = 0;; ++i) { + // Collect handles for all indirect calls in the SCC. + assert(CallHandles.empty() && "Must start with a clear set of handles."); + for (LazyCallGraph::Node &N : *C) { + CallSet.clear(); + for (Instruction &I : instructions(N.getFunction())) + if (auto CS = CallSite(&I)) + if (!CS.getCalledFunction()) + if (CallSet.insert(&I).second) + CallHandles.push_back(WeakVH(&I)); + } + + PreservedAnalyses PassPA = Pass.run(*C, AM, CG, UR); + + // Update the SCC if necessary. + C = UR.UpdatedC ? UR.UpdatedC : C; + + // Check that we didn't miss any update scenario. + assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!"); + assert(C->begin() != C->end() && "Cannot have an empty SCC!"); + + // Update the analysis manager with each run and intersect the total set + // of preserved analyses. + PassPA = AM.invalidate(*C, std::move(PassPA)); + PA.intersect(std::move(PassPA)); + + + // Check whether any of the handles were devirtualized. + bool Devirt = false; + for (WeakVH &CallH : CallHandles) { + if (!CallH) + continue; + auto CS = CallSite(CallH); + if (!CS) + continue; + + // If the call is still indirect, leave it alone. + Function *F = CS.getCalledFunction(); + if (!F) + continue; + + // Otherwise, we no longer track it, but if it is + // not an intrinsic, we've actually devirtualized + // something. + if (!F->isIntrinsic()) { + Devirt = true; + break; + } + } + CallHandles.clear(); + // If nothing got devirtualized, we're done. + if (!Devirt) + break; + // Otherwise, if we've already hit our max, we're done. + if (i >= MaxRepeatitions) { + if (DebugLogging) + dbgs() << "Found another devirtualization after hitting the max " + "number of repeatitions (" + << MaxRepeatitions << ") on SCC: " << *C << "\n"; + break; + } + + if (DebugLogging) + dbgs() << "Repeating an SCC pass after finding a devirtualization in: " + << *C << "\n"; + } + + return PA; + } + +private: + PassT Pass; + int MaxRepeatitions; + bool DebugLogging; +}; + +/// \brief A function to deduce a function pass type and wrap it in the +/// templated adaptor. +template +DevirtSCCRepeatedPass +createDevirtSCCRepeatedPass(PassT Pass, int MaxRepeatitions, + bool DebugLogging = false) { + return DevirtSCCRepeatedPass(std::move(Pass), MaxRepeatitions, + DebugLogging); +} } #endif Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -284,6 +284,15 @@ return Count; } +static Optional parseDevirtPassName(StringRef Name) { + if (!Name.consume_front("devirt<") || !Name.consume_back(">")) + return None; + int Count; + if (Name.getAsInteger(0, Count) || Count <= 0) + return None; + return Count; +} + static bool isModulePassName(StringRef Name) { // Manually handle aliases for pre-configured pipeline fragments. if (Name.startswith("default") || Name.startswith("lto")) @@ -322,6 +331,8 @@ // Explicitly handle custom-parsed pass names. if (parseRepeatPassName(Name)) return true; + if (parseDevirtPassName(Name)) + return true; #define CGSCC_PASS(NAME, CREATE_PASS) \ if (Name == NAME) \ @@ -562,6 +573,15 @@ CGPM.addPass(createRepeatedPass(*Count, std::move(NestedCGPM))); return true; } + if (auto MaxRepeatitions = parseDevirtPassName(Name)) { + CGSCCPassManager NestedCGPM(DebugLogging); + if (!parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, VerifyEachPass, + DebugLogging)) + return false; + CGPM.addPass(createDevirtSCCRepeatedPass(std::move(NestedCGPM), + *MaxRepeatitions, DebugLogging)); + return true; + } // Normal passes can't have pipelines. return false; } Index: test/Other/cgscc-devirt-iteration.ll =================================================================== --- /dev/null +++ test/Other/cgscc-devirt-iteration.ll @@ -0,0 +1,81 @@ +; The CGSCC pass manager includes an SCC iteration utility that tracks indirect +; calls that are turned into direct calls (devirtualization) and re-visits the +; SCC to expose those calls to the SCC-based IPO passes. We trigger +; devirtualization here with GVN which forwards a store through a load and to +; an indirect call. +; +; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs)' -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=BEFORE +; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(devirt<1>(function-attrs,function(gvn)))' -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AFTER --check-prefix=AFTER1 +; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(devirt<2>(function-attrs,function(gvn)))' -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AFTER --check-prefix=AFTER2 + +declare void @readnone() readnone +declare void @unknown() + +; The @test1 function checks that when we refine an indirect call to a direct +; call we revisit the SCC passes to reflect the more precise information. This +; is the basic functionality. + +; BEFORE: define void @test1() { +; AFTER: define void @test1() #0 { +define void @test1() { + %fptr = alloca void ()* + store void ()* @readnone, void ()** %fptr + %f = load void ()*, void ()** %fptr + call void %f() + ret void +} + +; The @test2_* functions check that when we need multiple (in this case 2) +; repeatitions to compute some state that is incrementally exposed with each +; one, the limit on repeatitions is enforced. So we make progress with +; one repeatition but not as much as with three. +; +; This is somewhat awkward to test because we have to contrive to have a state +; repeatition triggered and observed with very few passes. The technique here +; is to have one indirect call that can only be resolved when the entire SCC is +; deduced as readonce, and mark that indirect call at the call site as readonce +; to make that possible. + +declare void @readnone_with_arg(void ()** %ignore) readnone + +define void @test2_a(void ()** %ignore) { +; BEFORE: define void @test2_a(void ()** %ignore) { +; AFTER1: define void @test2_a(void ()** readnone %ignore) #1 { +; AFTER2: define void @test2_a(void ()** readnone %ignore) #0 { +entry: + %f1ptr = alloca void (void ()**)* + store void (void ()**)* @readnone_with_arg, void (void ()**)** %f1ptr + %f1 = load void (void ()**)*, void (void ()**)** %f1ptr + call void %f1(void ()** %ignore) +; BEFORE: call void %f1(void ()** %ignore) +; AFTER1: call void @readnone_with_arg(void ()** %ignore) +; AFTER2: call void @readnone_with_arg(void ()** %ignore) + + ; Bogus call to test2_b to make this a cycle. + call void @test2_b() + + ret void +} + +define void @test2_b() { +; BEFORE: define void @test2_b() { +; AFTER1: define void @test2_b() #1 { +; AFTER2: define void @test2_b() #0 { +entry: + %f2ptr = alloca void ()* + store void ()* @readnone, void ()** %f2ptr + ; Call the other function here to prevent forwarding until the SCC has had + ; function attrs deduced. + call void @test2_a(void ()** %f2ptr) + + %f2 = load void ()*, void ()** %f2ptr + call void %f2() readonly +; BEFORE: call void %f2() +; AFTER1: call void @readnone() +; AFTER2: call void @readnone() + + ret void +} + +; CHECK: attributes #0 = { readnone } +; CHECK: attributes #1 = { readonly } Index: test/Other/cgscc-observe-devirt.ll =================================================================== --- test/Other/cgscc-observe-devirt.ll +++ test/Other/cgscc-observe-devirt.ll @@ -1,130 +1,99 @@ +; Make sure that even without some external devirtualization iteration tool, +; the CGSCC pass manager correctly observes and re-visits SCCs that change +; structure due to devirtualization. We trigger devirtualization here with GVN +; which forwards a store through a load and to an indirect call. +; ; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs)' -S < %s | FileCheck %s --check-prefix=BEFORE ; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs,function(gvn))' -S < %s | FileCheck %s --check-prefix=AFTER -; -; Also check that adding an extra CGSCC pass after the function update but -; without requiring the outer manager to iterate doesn't break any invariant. -; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs,function(gvn),function-attrs)' -S < %s | FileCheck %s --check-prefix=AFTER2 declare void @readnone() readnone declare void @unknown() -; The @test1_* functions check that when we refine an indirect call to a direct -; call, even if it doesn't change the call graph structure, we revisit the SCC -; passes to reflect the more precise information. -; FIXME: Currently, this isn't implemented in the new pass manager and so we -; only get this with AFTER2, not with AFTER. - -; BEFORE: define void @test1_a() { -; AFTER: define void @test1_a() { -; AFTER2: define void @test1_a() { -define void @test1_a() { - %fptr = alloca void()* - store void()* @unknown, void()** %fptr - %f = load void()*, void()** %fptr - call void %f() - ret void -} - -; BEFORE: define void @test1_b() { -; AFTER: define void @test1_b() { -; AFTER2: define void @test1_b() #0 { -define void @test1_b() { - %fptr = alloca void()* - store void()* @readnone, void()** %fptr - %f = load void()*, void()** %fptr - call void %f() - ret void -} - -; The @test2_* checks that if we refine an indirect call to a direct call and +; The @test1_* checks that if we refine an indirect call to a direct call and ; in the process change the very structure of the call graph we also revisit ; that component of the graph and do so in an up-to-date fashion. -; BEFORE: define void @test2_a1() { -; AFTER: define void @test2_a1() { -; AFTER2: define void @test2_a1() { -define void @test2_a1() { +; BEFORE: define void @test1_a1() { +; AFTER: define void @test1_a1() { +define void @test1_a1() { %fptr = alloca void()* - store void()* @test2_b2, void()** %fptr - store void()* @test2_b1, void()** %fptr + store void()* @test1_b2, void()** %fptr + store void()* @test1_b1, void()** %fptr %f = load void()*, void()** %fptr call void %f() ret void } -; BEFORE: define void @test2_b1() { -; AFTER: define void @test2_b1() { -; AFTER2: define void @test2_b1() { -define void @test2_b1() { +; BEFORE: define void @test1_b1() { +; AFTER: define void @test1_b1() { +define void @test1_b1() { call void @unknown() - call void @test2_a1() + call void @test1_a1() ret void } -; BEFORE: define void @test2_a2() { -; AFTER: define void @test2_a2() #0 { -; AFTER2: define void @test2_a2() #0 { -define void @test2_a2() { +; BEFORE: define void @test1_a2() { +; AFTER: define void @test1_a2() #0 { +define void @test1_a2() { %fptr = alloca void()* - store void()* @test2_b1, void()** %fptr - store void()* @test2_b2, void()** %fptr + store void()* @test1_b1, void()** %fptr + store void()* @test1_b2, void()** %fptr %f = load void()*, void()** %fptr call void %f() ret void } -; BEFORE: define void @test2_b2() { -; AFTER: define void @test2_b2() #0 { -; AFTER2: define void @test2_b2() #0 { -define void @test2_b2() { +; BEFORE: define void @test1_b2() { +; AFTER: define void @test1_b2() #0 { +define void @test1_b2() { call void @readnone() - call void @test2_a2() + call void @test1_a2() ret void } -; The @test3_* set of functions exercise a case where running function passes +; The @test2_* set of functions exercise a case where running function passes ; introduces a new post-order relationship that was not present originally and ; makes sure we walk across the SCCs in that order. -; CHECK: define void @test3_a() { -define void @test3_a() { - call void @test3_b1() - call void @test3_b2() - call void @test3_b3() +; CHECK: define void @test2_a() { +define void @test2_a() { + call void @test2_b1() + call void @test2_b2() + call void @test2_b3() call void @unknown() ret void } -; CHECK: define void @test3_b1() #0 { -define void @test3_b1() { +; CHECK: define void @test2_b1() #0 { +define void @test2_b1() { %fptr = alloca void()* - store void()* @test3_a, void()** %fptr + store void()* @test2_a, void()** %fptr store void()* @readnone, void()** %fptr %f = load void()*, void()** %fptr call void %f() ret void } -; CHECK: define void @test3_b2() #0 { -define void @test3_b2() { +; CHECK: define void @test2_b2() #0 { +define void @test2_b2() { %fptr = alloca void()* - store void()* @test3_a, void()** %fptr - store void()* @test3_b2, void()** %fptr - store void()* @test3_b3, void()** %fptr - store void()* @test3_b1, void()** %fptr + store void()* @test2_a, void()** %fptr + store void()* @test2_b2, void()** %fptr + store void()* @test2_b3, void()** %fptr + store void()* @test2_b1, void()** %fptr %f = load void()*, void()** %fptr call void %f() ret void } -; CHECK: define void @test3_b3() #0 { -define void @test3_b3() { +; CHECK: define void @test2_b3() #0 { +define void @test2_b3() { %fptr = alloca void()* - store void()* @test3_a, void()** %fptr - store void()* @test3_b2, void()** %fptr - store void()* @test3_b3, void()** %fptr - store void()* @test3_b1, void()** %fptr + store void()* @test2_a, void()** %fptr + store void()* @test2_b2, void()** %fptr + store void()* @test2_b3, void()** %fptr + store void()* @test2_b1, void()** %fptr %f = load void()*, void()** %fptr call void %f() ret void