diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -145,8 +145,6 @@ ~FunctionSpecializer(); - bool isClonedFunction(Function *F) { return Specializations.count(F); } - bool run(); private: diff --git a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h --- a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h +++ b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h @@ -39,14 +39,6 @@ class Value; class ValueLatticeElement; -/// Helper struct for bundling up the analysis results per function for IPSCCP. -struct AnalysisResultsForFn { - std::unique_ptr PredInfo; - DominatorTree *DT; - PostDominatorTree *PDT; - LoopInfo *LI; -}; - /// Helper struct shared between Function Specialization and SCCP Solver. struct ArgInfo { Argument *Formal; // The Formal argument being analysed. @@ -82,7 +74,9 @@ ~SCCPSolver(); - void addAnalysis(Function &F, AnalysisResultsForFn A); + void addLoopInfo(Function &F, LoopInfo &LI); + + void addPredicateInfo(Function &F, DominatorTree &DT, AssumptionCache &AC); /// markBlockExecutable - This method can be used by clients to mark all of /// the blocks that are known to be intrinsically live in the processed unit. @@ -93,8 +87,6 @@ const LoopInfo &getLoopInfo(Function &F); - DomTreeUpdater getDTU(Function &F); - /// trackValueOfGlobalVariable - Clients can use this method to /// inform the SCCPSolver that it should track loads and stores to the /// specified global variable if it can. This is only legal to call if diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -110,7 +110,8 @@ std::function GetTLI, std::function GetTTI, std::function GetAC, - function_ref getAnalysis, + std::function GetDT, + std::function GetLI, bool IsFuncSpecEnabled) { SCCPSolver Solver(DL, GetTLI, M.getContext()); FunctionSpecializer Specializer(Solver, M, FAM, GetTLI, GetTTI, GetAC); @@ -121,7 +122,12 @@ if (F.isDeclaration()) continue; - Solver.addAnalysis(F, getAnalysis(F)); + DominatorTree &DT = GetDT(F); + AssumptionCache &AC = GetAC(F); + Solver.addPredicateInfo(F, DT, AC); + + if (IsFuncSpecEnabled) + Solver.addLoopInfo(F, GetLI(F)); // Determine if we can track the function's return values. If so, add the // function to the solver's set of return-tracked functions. @@ -222,10 +228,9 @@ BB, InsertedValues, NumInstRemoved, NumInstReplaced); } - DomTreeUpdater DTU = IsFuncSpecEnabled && Specializer.isClonedFunction(&F) - ? DomTreeUpdater(DomTreeUpdater::UpdateStrategy::Lazy) - : Solver.getDTU(F); - + DominatorTree *DT = FAM->getCachedResult(F); + PostDominatorTree *PDT = FAM->getCachedResult(F); + DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy); // Change dead blocks to unreachable. We do it after replacing constants // in all executable blocks, because changeToUnreachable may remove PHI // nodes in executable blocks we found values for. The function's entry @@ -387,15 +392,14 @@ auto GetAC = [&FAM](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; - auto getAnalysis = [&FAM, this](Function &F) -> AnalysisResultsForFn { - DominatorTree &DT = FAM.getResult(F); - return { - std::make_unique(F, DT, FAM.getResult(F)), - &DT, FAM.getCachedResult(F), - isFuncSpecEnabled() ? &FAM.getResult(F) : nullptr }; + auto GetDT = [&FAM](Function &F) -> DominatorTree & { + return FAM.getResult(F); + }; + auto GetLI = [&FAM](Function &F) -> LoopInfo & { + return FAM.getResult(F); }; - if (!runIPSCCP(M, DL, &FAM, GetTLI, GetTTI, GetAC, getAnalysis, + if (!runIPSCCP(M, DL, &FAM, GetTLI, GetTTI, GetAC, GetDT, GetLI, isFuncSpecEnabled())) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -386,7 +386,9 @@ using Edge = std::pair; DenseSet KnownFeasibleEdges; - DenseMap AnalysisResults; + DenseMap FnLoopInfo; + DenseMap> FnPredicateInfo; + DenseMap> AdditionalUsers; LLVMContext &Ctx; @@ -649,8 +651,12 @@ void visitInstruction(Instruction &I); public: - void addAnalysis(Function &F, AnalysisResultsForFn A) { - AnalysisResults.insert({&F, std::move(A)}); + void addLoopInfo(Function &F, LoopInfo &LI) { + FnLoopInfo.insert({&F, &LI}); + } + + void addPredicateInfo(Function &F, DominatorTree &DT, AssumptionCache &AC) { + FnPredicateInfo.insert({&F, std::make_unique(F, DT, AC)}); } void visitCallInst(CallInst &I) { visitCallBase(I); } @@ -658,23 +664,17 @@ bool markBlockExecutable(BasicBlock *BB); const PredicateBase *getPredicateInfoFor(Instruction *I) { - auto A = AnalysisResults.find(I->getParent()->getParent()); - if (A == AnalysisResults.end()) + auto It = FnPredicateInfo.find(I->getParent()->getParent()); + if (It == FnPredicateInfo.end()) return nullptr; - return A->second.PredInfo->getPredicateInfoFor(I); + return It->second->getPredicateInfoFor(I); } const LoopInfo &getLoopInfo(Function &F) { - auto A = AnalysisResults.find(&F); - assert(A != AnalysisResults.end() && A->second.LI && + auto It = FnLoopInfo.find(&F); + assert(It != FnLoopInfo.end() && It->second && "Need LoopInfo analysis results for function."); - return *A->second.LI; - } - - DomTreeUpdater getDTU(Function &F) { - auto A = AnalysisResults.find(&F); - assert(A != AnalysisResults.end() && "Need analysis results for function."); - return {A->second.DT, A->second.PDT, DomTreeUpdater::UpdateStrategy::Lazy}; + return *It->second; } SCCPInstVisitor(const DataLayout &DL, @@ -1950,8 +1950,13 @@ SCCPSolver::~SCCPSolver() = default; -void SCCPSolver::addAnalysis(Function &F, AnalysisResultsForFn A) { - return Visitor->addAnalysis(F, std::move(A)); +void SCCPSolver::addLoopInfo(Function &F, LoopInfo &LI) { + Visitor->addLoopInfo(F, LI); +} + +void SCCPSolver::addPredicateInfo(Function &F, DominatorTree &DT, + AssumptionCache &AC) { + Visitor->addPredicateInfo(F, DT, AC); } bool SCCPSolver::markBlockExecutable(BasicBlock *BB) { @@ -1966,8 +1971,6 @@ return Visitor->getLoopInfo(F); } -DomTreeUpdater SCCPSolver::getDTU(Function &F) { return Visitor->getDTU(F); } - void SCCPSolver::trackValueOfGlobalVariable(GlobalVariable *GV) { Visitor->trackValueOfGlobalVariable(GV); } diff --git a/llvm/test/Transforms/SCCP/ipsccp-preserve-pdt.ll b/llvm/test/Transforms/SCCP/ipsccp-preserve-pdt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SCCP/ipsccp-preserve-pdt.ll @@ -0,0 +1,82 @@ +; RUN: opt -passes="ipsccp,print" -force-specialization -funcspec-max-iters=2 -funcspec-max-clones=1 -funcspec-for-literal-constant=true -S < %s 2>&1 | FileCheck %s + +; REQUIRES: asserts + +; This test case is trying to validate that the postdomtree is preserved +; correctly by the ipsccp pass. A tricky bug was introduced in commit +; 1b1232047e83b69561 when PDT would be feched using getCachedAnalysis in order +; to setup a DomTreeUpdater (to update the PDT during transformation in order +; to preserve the analysis). But given that commit the PDT could end up being +; required and calculated via BlockFrequency analysis. So the problem was that +; when setting up the DomTreeUpdater we used a nullptr in case PDT wasn't +; cached at the begininng of IPSCCP, to indicate that no updates where needed +; for PDT. But then the PDT was calculated, given the input IR, and preserved +; using the non-updated state (as the DTU wasn't configured for updating the +; PDT). + +; CHECK-NOT: +; CHECK: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-NEXT: [1] <> {4294967295,4294967295} [0] +; CHECK-NEXT: [2] %for.body {4294967295,4294967295} [1] +; CHECK-NEXT: [2] %if.end4 {4294967295,4294967295} [1] +; CHECK-NEXT: [3] %entry {4294967295,4294967295} [2] +; CHECK-NEXT: [2] %for.cond34 {4294967295,4294967295} [1] +; CHECK-NEXT: [3] %for.cond16 {4294967295,4294967295} [2] +; CHECK-NEXT: Roots: %for.body %for.cond34 +; CHECK-NEXT: PostDominatorTree for function: bar +; CHECK-NOT: + +declare hidden i1 @compare(ptr) align 2 +declare hidden { i8, ptr } @getType(ptr) align 2 + +define internal void @foo(ptr %TLI, ptr %DL, ptr %Ty, ptr %ValueVTs, ptr %Offsets, i64 %StartingOffset) { +entry: + %VT = alloca i64, align 8 + br i1 false, label %if.then, label %if.end4 + +if.then: ; preds = %entry + ret void + +if.end4: ; preds = %entry + %cmp = call zeroext i1 @compare(ptr undef) + br i1 %cmp, label %for.body, label %for.cond16 + +for.body: ; preds = %if.end4 + %add13 = add i64 %StartingOffset, undef + call void @foo(ptr %TLI, ptr %DL, ptr undef, ptr %ValueVTs, ptr %Offsets, i64 %add13) + unreachable + +for.cond16: ; preds = %for.cond34, %if.end4 + %call27 = call { i8, ptr } @getType(ptr %VT) + br label %for.cond34 + +for.cond34: ; preds = %for.body37, %for.cond16 + br i1 undef, label %for.body37, label %for.cond16 + +for.body37: ; preds = %for.cond34 + %tobool39 = icmp ne ptr %Offsets, null + br label %for.cond34 +} + +define hidden { ptr, i32 } @bar(ptr %this) { +entry: + %Offsets = alloca i64, align 8 + %cmp26 = call zeroext i1 @compare(ptr undef) + br i1 %cmp26, label %for.body28, label %for.cond.cleanup27 + +for.cond.cleanup27: ; preds = %entry + ret { ptr, i32 } undef + +for.body28: ; preds = %entry + %call33 = call zeroext i1 @compare(ptr undef) + br i1 %call33, label %if.then34, label %if.end106 + +if.then34: ; preds = %for.body28 + call void @foo(ptr %this, ptr undef, ptr undef, ptr undef, ptr null, i64 0) + unreachable + +if.end106: ; preds = %for.body28 + call void @foo(ptr %this, ptr undef, ptr undef, ptr undef, ptr %Offsets, i64 0) + unreachable +} +