Index: lib/Transforms/Scalar/NewGVN.cpp =================================================================== --- lib/Transforms/Scalar/NewGVN.cpp +++ lib/Transforms/Scalar/NewGVN.cpp @@ -63,6 +63,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -81,7 +82,6 @@ #include "llvm/Transforms/Scalar/GVNExpression.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Analysis/MemorySSA.h" #include "llvm/Transforms/Utils/PredicateInfo.h" #include "llvm/Transforms/Utils/VNCoercion.h" #include @@ -133,6 +133,74 @@ } } +// Tarjan's algorithm with Nuutila's improvements +// SCCIterator is actually fairly complex for the simple thing we want. +// It also wants to hand us SCC's that are unrelated to the phi node we ask +// about, and have us process them there or risk redoing work. +// Graph traits over a filter iterator also doesn't work that well here. +struct TarjanSCC { + + TarjanSCC() : Components(1) {} + + void Start(const Instruction *Start) { + if (Root.lookup(Start) == 0) + FindSCC(Start); + } + + const SmallPtrSetImpl &getComponentFor(const Value *V) const { + unsigned ComponentID = ValueToComponent.lookup(V); + + assert(ComponentID > 0 && + "Asking for a component for a value we never processed"); + return Components[ComponentID]; + } + +private: + void FindSCC(const Instruction *I) { + Root[I] = ++DFSNum; + // Store the DFS Number we had before it possibly gets incremented. + unsigned int OurDFS = DFSNum; + // InComponent.erase(I); + for (auto &Op : I->operands()) { + if (auto *InstOp = dyn_cast(Op)) { + if (Root.lookup(Op) == 0) + FindSCC(InstOp); + if (!InComponent.count(Op)) + Root[I] = std::min(Root.lookup(I), Root.lookup(Op)); + } + } + // See if we really were the root, by seeing if we still have our DFSNumber, + // or something lower + if (Root.lookup(I) == OurDFS) { + unsigned ComponentID = Components.size(); + Components.resize(Components.size() + 1); + auto &Component = Components.back(); + Component.insert(I); + DEBUG(dbgs() << "Component root is " << *I << "\n"); + InComponent.insert(I); + ValueToComponent[I] = ComponentID; + while (!Stack.empty() && Root.lookup(Stack.back()) >= OurDFS) { + auto *Member = Stack.back(); + DEBUG(dbgs() << "Component member is " << *Member << "\n"); + Component.insert(Member); + InComponent.insert(Member); + ValueToComponent[Member] = ComponentID; + Stack.pop_back(); + } + } else { + // Part of a component, push to stack + Stack.push_back(I); + } + } + unsigned int DFSNum = 1; + SmallPtrSet InComponent; + DenseMap Root; + SmallVector Stack; + // Store the components as vector of ptr sets, because we need the topo order + // of SCC's, but not individual member order + SmallVector, 8> Components; + DenseMap ValueToComponent; +}; // Congruence classes represent the set of expressions/instructions // that are all the same *during some scope in the function*. // That is, because of the way we perform equality propagation, and @@ -330,10 +398,14 @@ std::unique_ptr PredInfo; BumpPtrAllocator ExpressionAllocator; ArrayRecycler ArgRecycler; + TarjanSCC SCCFinder; // Number of function arguments, used by ranking unsigned int NumFuncArgs; + // RPOOrdering of basic blocks + DenseMap RPOOrdering; + // Congruence class info. // This class is called INITIAL in the paper. It is the class everything @@ -378,6 +450,8 @@ enum MemoryPhiState { MPS_Invalid, MPS_TOP, MPS_Equivalent, MPS_Unique }; DenseMap MemoryPhiState; + enum PhiCycleState { PCS_Unknown, PCS_CycleFree, PCS_Cycle }; + DenseMap PhiCycleState; // Expression to class mapping. using ExpressionClassMap = DenseMap; ExpressionClassMap ExpressionToClass; @@ -432,7 +506,8 @@ // Expression handling. const Expression *createExpression(Instruction *); const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *); - PHIExpression *createPHIExpression(Instruction *); + PHIExpression *createPHIExpression(Instruction *, bool &HasBackedge, + bool &AllConstant); const VariableExpression *createVariableExpression(Value *); const ConstantExpression *createConstantExpression(Constant *); const Expression *createVariableOrConstant(Value *V); @@ -572,7 +647,7 @@ ? InstrToDFSNum(cast(MA)->getMemoryInst()) : InstrDFS.lookup(MA); } - + bool isCycleFree(const PHINode *PN); template T *getMinDFSOfRange(const Range &) const; // Debug counter info. When verifying, we have to reset the value numbering // debug counter to the same state it started in to get the same results. @@ -627,7 +702,8 @@ ExpressionAllocator.Deallocate(E); } -PHIExpression *NewGVN::createPHIExpression(Instruction *I) { +PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, + bool &AllConstant) { BasicBlock *PHIBlock = I->getParent(); auto *PN = cast(I); auto *E = @@ -637,6 +713,8 @@ E->setType(I->getType()); E->setOpcode(I->getOpcode()); + unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock)); + // Filter out unreachable phi operands. auto Filtered = make_filter_range(PN->operands(), [&](const Use &U) { return ReachableEdges.count({PN->getIncomingBlock(U), PHIBlock}); @@ -644,6 +722,12 @@ std::transform(Filtered.begin(), Filtered.end(), op_inserter(E), [&](const Use &U) -> Value * { + auto *BB = PN->getIncomingBlock(U); + auto *DTN = DT->getNode(BB); + if (RPOOrdering.lookup(DTN) >= PHIRPO) + HasBackedge = true; + AllConstant &= isa(U) || isa(U); + // Don't try to transform self-defined phis. if (U == PN) return PN; @@ -1312,10 +1396,42 @@ return Changed; } +bool NewGVN::isCycleFree(const PHINode *PN) { + auto PCS = PhiCycleState.lookup(PN); + if (PCS == PCS_Unknown) { + SCCFinder.Start(PN); + auto &SCC = SCCFinder.getComponentFor(PN); + // It's cycle free if it's size 1 or or the SCC is *only* phi nodes. + if (SCC.size() == 1) + PhiCycleState.insert({PN, PCS_CycleFree}); + else { + bool AllPhis = + llvm::all_of(SCC, [](const Value *V) { return isa(V); }); + PCS = AllPhis ? PCS_CycleFree : PCS_Cycle; + for (auto *Member : SCC) + if (auto *MemberPhi = dyn_cast(Member)) + PhiCycleState.insert({MemberPhi, PCS}); + } + } + if (PCS == PCS_Cycle) + return false; + return true; +} // Evaluate PHI nodes symbolically, and create an expression result. const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) { - auto *E = cast(createPHIExpression(I)); + // True if one of the incoming phi edges is a backedge. + bool HasBackedge = false; + // All constant tracks the state of whether all the *original* phi operands + // were constant. + // This is really shorthand for "this phi cannot cycle due to forward + // propagation", as any + // change in value of the phi is guaranteed not to later change the value of + // the phi. + // IE it can't be v = phi(undef, v+1) + bool AllConstant = true; + auto *E = + cast(createPHIExpression(I, HasBackedge, AllConstant)); // We match the semantics of SimplifyPhiNode from InstructionSimplify here. // See if all arguaments are the same. @@ -1337,10 +1453,12 @@ deleteExpression(E); return createConstantExpression(UndefValue::get(I->getType())); } + unsigned NumOps = 0; Value *AllSameValue = *(Filtered.begin()); ++Filtered.begin(); // Can't use std::equal here, sadly, because filter.begin moves. - if (llvm::all_of(Filtered, [AllSameValue](const Value *V) { + if (llvm::all_of(Filtered, [AllSameValue, &NumOps](const Value *V) { + ++NumOps; return V == AllSameValue; })) { // In LLVM's non-standard representation of phi nodes, it's possible to have @@ -1353,6 +1471,15 @@ // We also special case undef, so that if we have an undef, we can't use the // common value unless it dominates the phi block. if (HasUndef) { + // If we have undef and at least one other value, this is really a + // multivalued phi. + + // We need to know if it's cycle free in order to evaluate whether we can + // ignore the undef. + if (!AllConstant && HasBackedge && NumOps > 0 && + !isa(AllSameValue) && !isCycleFree(cast(I))) + return E; + // Only have to check for instructions if (auto *AllSameInst = dyn_cast(AllSameValue)) if (!someEquivalentDominates(AllSameInst, I)) @@ -2559,7 +2686,6 @@ // The dominator tree does guarantee that, for a given dom tree node, it's // parent must occur before it in the RPO ordering. Thus, we only need to sort // the siblings. - DenseMap RPOOrdering; ReversePostOrderTraversal RPOT(&F); unsigned Counter = 0; for (auto &B : RPOT) { @@ -2572,7 +2698,7 @@ auto *Node = DT->getNode(B); if (Node->getChildren().size() > 1) std::sort(Node->begin(), Node->end(), - [&RPOOrdering](const DomTreeNode *A, const DomTreeNode *B) { + [&](const DomTreeNode *A, const DomTreeNode *B) { return RPOOrdering[A] < RPOOrdering[B]; }); } Index: test/Transforms/NewGVN/basic-cyclic-opt.ll =================================================================== --- test/Transforms/NewGVN/basic-cyclic-opt.ll +++ test/Transforms/NewGVN/basic-cyclic-opt.ll @@ -142,6 +142,10 @@ br label %bb4 bb21: ; preds = %bb4 +;; We should be able to prove p.0 is 0, but because we do not try to determine +;; whether the phi p.0 is cycle-free (IE not dependent on itself), it takes +;; two passes of newgvn, one to replace the phi with phi(undef, 0), one to +;; simplify it to 0. ret i32 %p.0 } Index: test/Transforms/NewGVN/pr32607.ll =================================================================== --- /dev/null +++ test/Transforms/NewGVN/pr32607.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -newgvn %s -S -o - | FileCheck %s +define hidden void @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: top: +; CHECK-NEXT: br label [[IF:%.*]] +; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[TMP1:%.*]], [[IF]] ], [ undef, [[TOP:%.*]] ] +; CHECK-NEXT: [[TMP1]] = fadd double [[TMP0]], undef +; CHECK-NEXT: br i1 false, label [[L50:%.*]], label [[IF]] +; CHECK: L50: +; CHECK-NEXT: store i8 undef, i8* null +; CHECK-NEXT: ret void +; +top: + %.promoted = load double, double* undef, align 8 + br label %if + +;; This is really a multi-valued phi, because the phi is defined by an expression of the phi. +;; This means that we can't propagate the value over the backedge, because we'll just cycle +;; through every value. + +if: ; preds = %if, %top + %0 = phi double [ %1, %if ], [ %.promoted, %top ] + %1 = fadd double %0, undef + br i1 false, label %L50, label %if + +L50: ; preds = %if + %.lcssa = phi double [ %1, %if ] + store double %.lcssa, double* undef, align 8 + ret void +} +