Index: lib/Transforms/Scalar/NewGVN.cpp =================================================================== --- lib/Transforms/Scalar/NewGVN.cpp +++ lib/Transforms/Scalar/NewGVN.cpp @@ -334,6 +334,9 @@ // Number of function arguments, used by ranking unsigned int NumFuncArgs; + // RPOOrdering of basic blocks + DenseMap RPOOrdering; + // Congruence class info. // This class is called INITIAL in the paper. It is the class everything @@ -432,7 +435,7 @@ // Expression handling. const Expression *createExpression(Instruction *); const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *); - PHIExpression *createPHIExpression(Instruction *); + PHIExpression *createPHIExpression(Instruction *, bool&HasBackedge, bool&AllConstant); const VariableExpression *createVariableExpression(Value *); const ConstantExpression *createConstantExpression(Constant *); const Expression *createVariableOrConstant(Value *V); @@ -627,7 +630,9 @@ ExpressionAllocator.Deallocate(E); } -PHIExpression *NewGVN::createPHIExpression(Instruction *I) { +PHIExpression *NewGVN::createPHIExpression(Instruction *I, + bool &HasBackedge, + bool &AllConstant) { BasicBlock *PHIBlock = I->getParent(); auto *PN = cast(I); auto *E = @@ -637,6 +642,8 @@ E->setType(I->getType()); E->setOpcode(I->getOpcode()); + unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock)); + // Filter out unreachable phi operands. auto Filtered = make_filter_range(PN->operands(), [&](const Use &U) { return ReachableEdges.count({PN->getIncomingBlock(U), PHIBlock}); @@ -644,6 +651,12 @@ std::transform(Filtered.begin(), Filtered.end(), op_inserter(E), [&](const Use &U) -> Value * { + auto *BB = PN->getIncomingBlock(U); + auto *DTN = DT->getNode(BB); + if (RPOOrdering.lookup(DTN) >= PHIRPO) + HasBackedge = true; + AllConstant &= isa(U) || isa(U); + // Don't try to transform self-defined phis. if (U == PN) return PN; @@ -1315,7 +1328,14 @@ // Evaluate PHI nodes symbolically, and create an expression result. const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) { - auto *E = cast(createPHIExpression(I)); + // True if one of the incoming phi edges is a backedge. + bool HasBackedge = false; + // All constant tracks the state of whether all the *original* phi operands were constant. + // This is really shorthand for "this phi cannot cycle due to forward propagation", as any + // change in value of the phi is guaranteed not to later change the value of the phi. + // IE it can't be v = phi(undef, v+1) + bool AllConstant = true; + auto *E = cast(createPHIExpression(I, HasBackedge, AllConstant)); // We match the semantics of SimplifyPhiNode from InstructionSimplify here. // See if all arguaments are the same. @@ -1337,10 +1357,12 @@ deleteExpression(E); return createConstantExpression(UndefValue::get(I->getType())); } + unsigned NumOps = 0; Value *AllSameValue = *(Filtered.begin()); ++Filtered.begin(); // Can't use std::equal here, sadly, because filter.begin moves. - if (llvm::all_of(Filtered, [AllSameValue](const Value *V) { + if (llvm::all_of(Filtered, [AllSameValue, &NumOps](const Value *V) { + ++NumOps; return V == AllSameValue; })) { // In LLVM's non-standard representation of phi nodes, it's possible to have @@ -1353,6 +1375,15 @@ // We also special case undef, so that if we have an undef, we can't use the // common value unless it dominates the phi block. if (HasUndef) { + // If we have undef and at least one other value, this is really a + // multivalued phi. We can't ignore the undef when going over a backedge + // unless the value being propagated *is* undef itself, or the original + // phi operands were all constants. This is because if they are all + // undef, they will not become less undef (and if they did, we will stop + // them here). Note that NumOps does not include the undef itself. + if (!AllConstant && HasBackedge && NumOps > 0 && !isa(AllSameValue)) + return E; + // Only have to check for instructions if (auto *AllSameInst = dyn_cast(AllSameValue)) if (!someEquivalentDominates(AllSameInst, I)) @@ -2559,7 +2590,6 @@ // The dominator tree does guarantee that, for a given dom tree node, it's // parent must occur before it in the RPO ordering. Thus, we only need to sort // the siblings. - DenseMap RPOOrdering; ReversePostOrderTraversal RPOT(&F); unsigned Counter = 0; for (auto &B : RPOT) { @@ -2572,7 +2602,7 @@ auto *Node = DT->getNode(B); if (Node->getChildren().size() > 1) std::sort(Node->begin(), Node->end(), - [&RPOOrdering](const DomTreeNode *A, const DomTreeNode *B) { + [&](const DomTreeNode *A, const DomTreeNode *B) { return RPOOrdering[A] < RPOOrdering[B]; }); } Index: test/Transforms/NewGVN/basic-cyclic-opt.ll =================================================================== --- test/Transforms/NewGVN/basic-cyclic-opt.ll +++ test/Transforms/NewGVN/basic-cyclic-opt.ll @@ -86,6 +86,7 @@ ; CHECK: bb4: ; CHECK-NEXT: [[M_0:%.*]] = phi i32 [ [[TMP3]], [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB19:%.*]] ] ; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP20:%.*]], [[BB19]] ] +; CHECK-NEXT: [[P_0:%.*]] = phi i32 [ undef, [[BB]] ], [ 0, [[BB19]] ] ; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[I_0]], [[TMP1]] ; CHECK-NEXT: br i1 [[TMP5]], label [[BB6:%.*]], label [[BB21:%.*]] ; CHECK: bb6: @@ -103,7 +104,7 @@ ; CHECK-NEXT: [[TMP20]] = add nsw i32 [[I_0]], 1 ; CHECK-NEXT: br label [[BB4]] ; CHECK: bb21: -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: ret i32 [[P_0]] ; bb: %tmp = getelementptr inbounds i32, i32* %data, i64 3 @@ -142,6 +143,10 @@ br label %bb4 bb21: ; preds = %bb4 +;; We should be able to prove p.0 is 0, but because we do not try to determine +;; whether the phi p.0 is cycle-free (IE not dependent on itself), it takes +;; two passes of newgvn, one to replace the phi with phi(undef, 0), one to +;; simplify it to 0. ret i32 %p.0 } Index: test/Transforms/NewGVN/pr31682.ll =================================================================== --- test/Transforms/NewGVN/pr31682.ll +++ test/Transforms/NewGVN/pr31682.ll @@ -12,8 +12,11 @@ ; CHECK-NEXT: [[TMP:%.*]] = load %struct.foo*, %struct.foo** @global ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: +; CHECK-NEXT: [[TMP3:%.*]] = phi %struct.foo* [ undef, [[BB:%.*]] ], [ [[TMP]], [[BB2]] ] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_FOO:%.*]], %struct.foo* [[TMP3]], i64 0, i32 1 ; CHECK-NEXT: br i1 undef, label [[BB2]], label [[BB7:%.*]] ; CHECK: bb7: +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_FOO]], %struct.foo* [[TMP]], i64 0, i32 1 ; CHECK-NEXT: br label [[BB10:%.*]] ; CHECK: bb10: ; CHECK-NEXT: br label [[BB10]] Index: test/Transforms/NewGVN/pr32607.ll =================================================================== --- /dev/null +++ test/Transforms/NewGVN/pr32607.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -newgvn %s -S -o - | FileCheck %s +define hidden void @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: top: +; CHECK-NEXT: br label [[IF:%.*]] +; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[TMP1:%.*]], [[IF]] ], [ undef, [[TOP:%.*]] ] +; CHECK-NEXT: [[TMP1]] = fadd double [[TMP0]], undef +; CHECK-NEXT: br i1 false, label [[L50:%.*]], label [[IF]] +; CHECK: L50: +; CHECK-NEXT: store i8 undef, i8* null +; CHECK-NEXT: ret void +; +top: + %.promoted = load double, double* undef, align 8 + br label %if + +;; This is really a multi-valued phi, because the phi is defined by an expression of the phi. +;; This means that we can't propagate the value over the backedge, because we'll just cycle +;; through every value. + +if: ; preds = %if, %top + %0 = phi double [ %1, %if ], [ %.promoted, %top ] + %1 = fadd double %0, undef + br i1 false, label %L50, label %if + +L50: ; preds = %if + %.lcssa = phi double [ %1, %if ] + store double %.lcssa, double* undef, align 8 + ret void +} +