diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h --- a/llvm/include/llvm/Analysis/CGSCCPassManager.h +++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h @@ -315,6 +315,10 @@ /// for a better technique. SmallDenseSet, 4> &InlinedInternalEdges; + + /// Used for the inliner pass to request DevirtSCCRepeatedPass to repeat one + /// more iteration. + bool RepeatDevirtSCC; }; /// The core module pass which does a post-order walk of the SCCs and @@ -612,6 +616,7 @@ if (!PI.runBeforePass(Pass, *C)) continue; + UR.RepeatDevirtSCC = false; PreservedAnalyses PassPA = Pass.run(*C, AM, CG, UR); if (UR.InvalidatedSCCs.count(C)) @@ -651,7 +656,8 @@ // so iterate to process this devirtualization site. return true; }; - bool Devirt = llvm::any_of(CallHandles, IsDevirtualizedHandle); + bool Repeat = UR.RepeatDevirtSCC || + llvm::any_of(CallHandles, IsDevirtualizedHandle); // Rescan to build up a new set of handles and count how many direct // calls remain. If we decide to iterate, this also sets up the input to @@ -664,7 +670,7 @@ // of direct calls for any function in the SCC. This can be fooled by all // manner of transformations such as DCE and other things, but seems to // work well in practice. - if (!Devirt) + if (!Repeat) // Iterate over the keys in NewCallCounts, if Function also exists in // CallCounts, make the check below. for (auto &Pair : NewCallCounts) { @@ -674,13 +680,13 @@ const auto &CallCountOld = CountIt->second; if (CallCountOld.Indirect > CallCountNew.Indirect && CallCountOld.Direct < CallCountNew.Direct) { - Devirt = true; + Repeat = true; break; } } } - if (!Devirt) { + if (!Repeat) { PA.intersect(std::move(PassPA)); break; } @@ -756,7 +762,9 @@ CGSCCUpdateResult UR = { RCWorklist, CWorklist, InvalidRefSCCSet, InvalidSCCSet, - nullptr, nullptr, PreservedAnalyses::all(), InlinedInternalEdges}; + nullptr, nullptr, PreservedAnalyses::all(), InlinedInternalEdges, + false + }; // Request PassInstrumentation from analysis manager, will use it to run // instrumenting callbacks for the passes later. diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -120,6 +120,12 @@ " callsites processed by inliner but decided" " to be not inlined")); +static cl::opt RepeatDevirtSCCForNewIndirectCalls( + "repeat-devirt-scc-for-new-indirect-calls", + cl::init(true), cl::Hidden, + cl::desc("Request to iterate DevirtSCCRepeatedPass one more time when " + "new indirect calls appear after inlining")); + LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {} LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime) @@ -1097,9 +1103,16 @@ int NewHistoryID = InlineHistory.size(); InlineHistory.push_back({&Callee, InlineHistoryID}); for (CallSite &CS : reverse(IFI.InlinedCallSites)) - if (Function *NewCallee = CS.getCalledFunction()) + if (Function *NewCallee = CS.getCalledFunction()) { if (!NewCallee->isDeclaration()) Calls.push_back({CS, NewHistoryID}); + } else { + // An indirect call appeared after inlining. Request one more + // DevirtSCCRepeatedPass iteration in case it turns into a direct + // call after cleanup. + if (RepeatDevirtSCCForNewIndirectCalls) + UR.RepeatDevirtSCC = true; + } } if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) diff --git a/llvm/test/Transforms/Inline/devirtualize-4.ll b/llvm/test/Transforms/Inline/devirtualize-4.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/devirtualize-4.ll @@ -0,0 +1,109 @@ +; RUN: opt < %s -aa-pipeline=basic-aa -passes='cgscc(devirt<4>(inline,function(instcombine)))' -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes='default' -S | FileCheck %s + +; Check that DoNotOptimize is inlined into Test. +; CHECK: @_Z4Testv() +; CHECK-NOT: ret void +; CHECK: call void asm +; CHECK: ret void + +;template +;void DoNotOptimize(const T& var) { +; asm volatile("" : "+m"(const_cast(var))); +;} +; +;class Interface { +; public: +; virtual void Run() = 0; +;}; +; +;class Impl : public Interface { +; public: +; Impl() : f(3) {} +; void Run() { DoNotOptimize(this); } +; +; private: +; int f; +;}; +; +;static void IndirectRun(Interface& o) { o.Run(); } +; +;void Test() { +; Impl o; +; IndirectRun(o); +;} + +%class.Impl = type <{ %class.Interface, i32, [4 x i8] }> +%class.Interface = type { i32 (...)** } + +@_ZTV4Impl = linkonce_odr dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI4Impl to i8*), i8* bitcast (void (%class.Impl*)* @_ZN4Impl3RunEv to i8*)] }, align 8 +@_ZTVN10__cxxabiv120__si_class_type_infoE = external dso_local global i8* +@_ZTS4Impl = linkonce_odr dso_local constant [6 x i8] c"4Impl\00", align 1 +@_ZTVN10__cxxabiv117__class_type_infoE = external dso_local global i8* +@_ZTS9Interface = linkonce_odr dso_local constant [11 x i8] c"9Interface\00", align 1 +@_ZTI9Interface = linkonce_odr dso_local constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @_ZTS9Interface, i32 0, i32 0) }, align 8 +@_ZTI4Impl = linkonce_odr dso_local constant { i8*, i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @_ZTS4Impl, i32 0, i32 0), i8* bitcast ({ i8*, i8* }* @_ZTI9Interface to i8*) }, align 8 +@_ZTV9Interface = linkonce_odr dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI9Interface to i8*), i8* bitcast (void ()* @__cxa_pure_virtual to i8*)] }, align 8 + +define dso_local void @_Z4Testv() local_unnamed_addr { +entry: + %o = alloca %class.Impl, align 8 + %0 = bitcast %class.Impl* %o to i8* + call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0) + call void @_ZN4ImplC2Ev(%class.Impl* nonnull %o) + %1 = getelementptr inbounds %class.Impl, %class.Impl* %o, i64 0, i32 0 + call fastcc void @_ZL11IndirectRunR9Interface(%class.Interface* nonnull dereferenceable(8) %1) + call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0) + ret void +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) + +define linkonce_odr dso_local void @_ZN4ImplC2Ev(%class.Impl* %this) unnamed_addr align 2 { +entry: + %0 = getelementptr %class.Impl, %class.Impl* %this, i64 0, i32 0 + call void @_ZN9InterfaceC2Ev(%class.Interface* %0) + %1 = getelementptr %class.Impl, %class.Impl* %this, i64 0, i32 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV4Impl, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %1, align 8 + %f = getelementptr inbounds %class.Impl, %class.Impl* %this, i64 0, i32 1 + store i32 3, i32* %f, align 8 + ret void +} + +define internal fastcc void @_ZL11IndirectRunR9Interface(%class.Interface* dereferenceable(8) %o) unnamed_addr { +entry: + %0 = bitcast %class.Interface* %o to void (%class.Interface*)*** + %vtable = load void (%class.Interface*)**, void (%class.Interface*)*** %0, align 8 + %1 = load void (%class.Interface*)*, void (%class.Interface*)** %vtable, align 8 + call void %1(%class.Interface* nonnull %o) + ret void +} + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +define linkonce_odr dso_local void @_ZN9InterfaceC2Ev(%class.Interface* %this) unnamed_addr align 2 { +entry: + %0 = getelementptr %class.Interface, %class.Interface* %this, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV9Interface, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8 + ret void +} + +define linkonce_odr dso_local void @_ZN4Impl3RunEv(%class.Impl* %this) unnamed_addr align 2 { +entry: + %ref.tmp = alloca %class.Impl*, align 8 + %0 = bitcast %class.Impl** %ref.tmp to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0) + store %class.Impl* %this, %class.Impl** %ref.tmp, align 8 + call void @_Z13DoNotOptimizeIP4ImplEvRKT_(%class.Impl** nonnull dereferenceable(8) %ref.tmp) + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0) + ret void +} + +declare dso_local void @__cxa_pure_virtual() unnamed_addr + +define linkonce_odr dso_local void @_Z13DoNotOptimizeIP4ImplEvRKT_(%class.Impl** dereferenceable(8) %var) local_unnamed_addr { +entry: + call void asm sideeffect "", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(%class.Impl** nonnull %var, %class.Impl** nonnull %var) + ret void +} +