diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h --- a/llvm/include/llvm/Analysis/CGSCCPassManager.h +++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h @@ -90,6 +90,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/PriorityWorklist.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -314,6 +315,16 @@ /// for a better technique. SmallDenseSet, 4> &InlinedInternalEdges; + + /// Weak VHs to keep track of indirect calls for the purposes of detecting + /// devirtualization. + /// + /// This is a map to avoid having duplicate entries. If a Value is + /// deallocated, its corresponding WeakTrackingVH will be nulled out. When + /// checking if a Value is in the map or not, also check if the corresponding + /// WeakTrackingVH is null to avoid issues with a new Value sharing the same + /// address as a deallocated one. + SmallMapVector IndirectVHs; }; /// The core module pass which does a post-order walk of the SCCs and @@ -596,9 +607,6 @@ // a pointer that we can update. LazyCallGraph::SCC *C = &InitialC; - // Collect value handles for all of the indirect call sites. - SmallVector CallHandles; - // Struct to track the counts of direct and indirect calls in each function // of the SCC. struct CallCount { @@ -608,35 +616,37 @@ // Put value handles on all of the indirect calls and return the number of // direct calls for each function in the SCC. - auto ScanSCC = [](LazyCallGraph::SCC &C, - SmallVectorImpl &CallHandles) { - assert(CallHandles.empty() && "Must start with a clear set of handles."); - - SmallDenseMap CallCounts; - CallCount CountLocal = {0, 0}; - for (LazyCallGraph::Node &N : C) { - CallCount &Count = - CallCounts.insert(std::make_pair(&N.getFunction(), CountLocal)) - .first->second; - for (Instruction &I : instructions(N.getFunction())) - if (auto *CB = dyn_cast(&I)) { - if (CB->getCalledFunction()) { - ++Count.Direct; - } else { - ++Count.Indirect; - CallHandles.push_back(WeakTrackingVH(&I)); - } + auto ScanSCC = + [](LazyCallGraph::SCC &C, + SmallMapVector &CallHandles) { + assert(CallHandles.empty() && + "Must start with a clear set of handles."); + + SmallDenseMap CallCounts; + CallCount CountLocal = {0, 0}; + for (LazyCallGraph::Node &N : C) { + CallCount &Count = + CallCounts.insert(std::make_pair(&N.getFunction(), CountLocal)) + .first->second; + for (Instruction &I : instructions(N.getFunction())) + if (auto *CB = dyn_cast(&I)) { + if (CB->getCalledFunction()) { + ++Count.Direct; + } else { + ++Count.Indirect; + CallHandles.insert({CB, WeakTrackingVH(CB)}); + } + } } - } - return CallCounts; - }; + return CallCounts; + }; + UR.IndirectVHs.clear(); // Populate the initial call handles and get the initial call counts. - auto CallCounts = ScanSCC(*C, CallHandles); + auto CallCounts = ScanSCC(*C, UR.IndirectVHs); for (int Iteration = 0;; ++Iteration) { - if (!PI.runBeforePass(Pass, *C)) continue; @@ -659,33 +669,22 @@ assert(C->begin() != C->end() && "Cannot have an empty SCC!"); // Check whether any of the handles were devirtualized. - auto IsDevirtualizedHandle = [&](WeakTrackingVH &CallH) { - if (!CallH) - return false; - auto *CB = dyn_cast(CallH); - if (!CB) - return false; - - // If the call is still indirect, leave it alone. - Function *F = CB->getCalledFunction(); - if (!F) - return false; - - LLVM_DEBUG(dbgs() << "Found devirtualized call from " - << CB->getParent()->getParent()->getName() << " to " - << F->getName() << "\n"); - - // We now have a direct call where previously we had an indirect call, - // so iterate to process this devirtualization site. - return true; - }; - bool Devirt = llvm::any_of(CallHandles, IsDevirtualizedHandle); + bool Devirt = llvm::any_of(UR.IndirectVHs, [](auto &P) -> bool { + if (P.second) { + CallBase *CB = cast(P.second); + if (CB->getCalledFunction()) { + LLVM_DEBUG(dbgs() << "Found devirtualized call: " << *CB << "\n"); + return true; + } + } + return false; + }); // Rescan to build up a new set of handles and count how many direct // calls remain. If we decide to iterate, this also sets up the input to // the next iteration. - CallHandles.clear(); - auto NewCallCounts = ScanSCC(*C, CallHandles); + UR.IndirectVHs.clear(); + auto NewCallCounts = ScanSCC(*C, UR.IndirectVHs); // If we haven't found an explicit devirtualization already see if we // have decreased the number of indirect calls and increased the number @@ -790,7 +789,8 @@ CGSCCUpdateResult UR = { RCWorklist, CWorklist, InvalidRefSCCSet, InvalidSCCSet, - nullptr, nullptr, PreservedAnalyses::all(), InlinedInternalEdges}; + nullptr, nullptr, PreservedAnalyses::all(), InlinedInternalEdges, + {}}; // Request PassInstrumentation from analysis manager, will use it to run // instrumenting callbacks for the passes later. diff --git a/llvm/lib/Analysis/CGSCCPassManager.cpp b/llvm/lib/Analysis/CGSCCPassManager.cpp --- a/llvm/lib/Analysis/CGSCCPassManager.cpp +++ b/llvm/lib/Analysis/CGSCCPassManager.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PassManagerImpl.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -476,9 +477,9 @@ // First walk the function and handle all called functions. We do this first // because if there is a single call edge, whether there are ref edges is // irrelevant. - for (Instruction &I : instructions(F)) - if (auto *CB = dyn_cast(&I)) - if (Function *Callee = CB->getCalledFunction()) + for (Instruction &I : instructions(F)) { + if (auto *CB = dyn_cast(&I)) { + if (Function *Callee = CB->getCalledFunction()) { if (Visited.insert(Callee).second && !Callee->isDeclaration()) { Node *CalleeN = G.lookup(*Callee); if (!CalleeN) { @@ -498,6 +499,17 @@ else if (!E->isCall()) PromotedRefTargets.insert(CalleeN); } + } else { + // We can miss devirtualization if an indirect call is created then + // promoted before updateCGAndAnalysisManagerForPass runs. + auto *Entry = UR.IndirectVHs.find(CB); + if (Entry == UR.IndirectVHs.end()) + UR.IndirectVHs.insert({CB, WeakTrackingVH(CB)}); + else if (!Entry->second) + Entry->second = WeakTrackingVH(CB); + } + } + } // Now walk all references. for (Instruction &I : instructions(F)) diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1843,7 +1843,7 @@ if (!Name.consume_front("devirt<") || !Name.consume_back(">")) return None; int Count; - if (Name.getAsInteger(0, Count) || Count <= 0) + if (Name.getAsInteger(0, Count) || Count < 0) return None; return Count; } diff --git a/llvm/test/Transforms/Inline/devirtualize-3.ll b/llvm/test/Transforms/Inline/devirtualize-3.ll --- a/llvm/test/Transforms/Inline/devirtualize-3.ll +++ b/llvm/test/Transforms/Inline/devirtualize-3.ll @@ -1,4 +1,5 @@ ; RUN: opt -basic-aa -S -O2 < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -S -passes='default' < %s | FileCheck %s ; PR5009 ; CHECK: define i32 @main() diff --git a/llvm/test/Transforms/Inline/devirtualize-5.ll b/llvm/test/Transforms/Inline/devirtualize-5.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/devirtualize-5.ll @@ -0,0 +1,22 @@ +; RUN: opt -abort-on-max-devirt-iterations-reached -passes='cgscc(devirt<1>(inline,instcombine))' -S < %s | FileCheck %s +; RUN: opt -abort-on-max-devirt-iterations-reached -passes='default' -S < %s | FileCheck %s + +define i32 @i() alwaysinline { + ret i32 45 +} + +; CHECK-LABEL: define i32 @main +; CHECK-NEXT: ret i32 45 + +define i32 @main() { + %a = alloca i32 ()* + store i32 ()* @i, i32 ()** %a + %r = call i32 @call(i32 ()** %a) + ret i32 %r +} + +define i32 @call(i32 ()** %a) alwaysinline { + %c = load i32 ()*, i32 ()** %a + %r = call i32 %c() + ret i32 %r +} diff --git a/llvm/test/Transforms/Inline/devirtualize-6.ll b/llvm/test/Transforms/Inline/devirtualize-6.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/devirtualize-6.ll @@ -0,0 +1,18 @@ +; Make sure we don't detect devirtualization on inlining a function with a direct call +; RUN: opt -abort-on-max-devirt-iterations-reached -passes='cgscc(devirt<0>(inline))' -S < %s | FileCheck %s + +define i32 @i() noinline { + ret i32 45 +} + +; CHECK-NOT: call i32 @call() + +define i32 @main() { + %r = call i32 @call() + ret i32 %r +} + +define i32 @call() alwaysinline { + %r = call i32 @i() + ret i32 %r +} diff --git a/llvm/test/Transforms/Inline/devirtualize.ll b/llvm/test/Transforms/Inline/devirtualize.ll --- a/llvm/test/Transforms/Inline/devirtualize.ll +++ b/llvm/test/Transforms/Inline/devirtualize.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -Os < %s | FileCheck %s +; RUN: opt -S -aa-pipeline=basic-aa -passes='default' < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0"