Index: llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp +++ llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp @@ -390,10 +390,10 @@ INITIALIZE_PASS_END(NewGVN, "newgvn", "Global Value Numbering", false, false) PHIExpression *NewGVN::createPHIExpression(Instruction *I) { - BasicBlock *PhiBlock = I->getParent(); + BasicBlock *PHIBlock = I->getParent(); auto *PN = cast(I); - auto *E = new (ExpressionAllocator) - PHIExpression(PN->getNumOperands(), I->getParent()); + auto *E = + new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock); E->allocateOperands(ArgRecycler, ExpressionAllocator); E->setType(I->getType()); @@ -408,10 +408,10 @@ std::transform(Filtered.begin(), Filtered.end(), op_inserter(E), [&](const Use &U) -> Value * { - // Don't try to transform self-defined phis + // Don't try to transform self-defined phis. if (U == PN) return PN; - const BasicBlockEdge BBE(PN->getIncomingBlock(U), PhiBlock); + const BasicBlockEdge BBE(PN->getIncomingBlock(U), PHIBlock); return lookupOperandLeader(U, I, BBE); }); return E; @@ -810,36 +810,50 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I, const BasicBlock *B) { auto *E = cast(createPHIExpression(I)); - if (E->op_empty()) { + // We match the semantics of SimplifyPhiNode from InstructionSimplify here. + + // See if all arguaments are the same. + // We track if any were undef because they need special handling. + bool HasUndef = false; + auto Filtered = make_filter_range(E->operands(), [&](const Value *Arg) { + if (Arg == I) + return false; + if (isa(Arg)) { + HasUndef = true; + return false; + } + return true; + }); + // If we are left with no operands, it's undef + if (Filtered.begin() == Filtered.end()) { DEBUG(dbgs() << "Simplified PHI node " << *I << " to undef" << "\n"); E->deallocateOperands(ArgRecycler); ExpressionAllocator.Deallocate(E); return createConstantExpression(UndefValue::get(I->getType())); } - - Value *AllSameValue = E->getOperand(0); - - // See if all arguments are the same, ignoring undef arguments, because we can - // choose a value that is the same for them. - for (const Value *Arg : E->operands()) - if (Arg != AllSameValue && !isa(Arg)) { - AllSameValue = nullptr; - break; + Value *AllSameValue = *(Filtered.begin()); + ++Filtered.begin(); + // Can't use std::equal here, sadly, because filter.begin moves. + if (llvm::all_of(Filtered, [AllSameValue](const Value *V) { + return V == AllSameValue; + })) { + // In LLVM's non-standard representation of phi nodes, it's possible to have + // phi nodes with cycles (IE dependent on other phis that are .... dependent + // on the original phi node), especially in weird CFG's where some arguments + // are unreachable, or uninitialized along certain paths. This can cause + // infinite loops during evaluation. We work around this by not trying to + // really evaluate them independently, but instead using a variable + // expression to say if one is equivalent to the other. + // We also special case undef, so that if we have an undef, we can't use the + // common value unless it dominates the phi block. + if (HasUndef) { + // Only have to check for instructions + if (Instruction *AllSameInst = dyn_cast(AllSameValue)) + if (!DT->dominates(AllSameInst, I)) + return E; } - if (AllSameValue) { - // It's possible to have phi nodes with cycles (IE dependent on - // other phis that are .... dependent on the original phi node), - // especially in weird CFG's where some arguments are unreachable, or - // uninitialized along certain paths. - // This can cause infinite loops during evaluation (even if you disable - // the recursion below, you will simply ping-pong between congruence - // classes). If a phi node symbolically evaluates to another phi node, - // just leave it alone. If they are really the same, we will still - // eliminate them in favor of each other. - if (isa(AllSameValue)) - return E; NumGVNPhisAllSame++; DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue << "\n"); @@ -1867,7 +1881,6 @@ SmallVector, 8> DFSStack; }; } - bool NewGVN::eliminateInstructions(Function &F) { // This is a non-standard eliminator. The normal way to eliminate is // to walk the dominator tree in order, keeping track of available @@ -1984,9 +1997,13 @@ Value *Member = C.Val; Use *MemberUse = C.U; - // We ignore void things because we can't get a value from them. - if (Member && Member->getType()->isVoidTy()) - continue; + if (Member) { + // We ignore void things because we can't get a value from them. + // FIXME: We could actually use this to kill dead stores that are + // dominated by equivalent earlier stores. + if (Member->getType()->isVoidTy()) + continue; + } if (EliminationStack.empty()) { DEBUG(dbgs() << "Elimination Stack is empty\n"); @@ -1995,8 +2012,6 @@ << EliminationStack.dfs_back().first << "," << EliminationStack.dfs_back().second << ")\n"); } - if (Member && isa(Member)) - assert(isa(CC->RepLeader)); DEBUG(dbgs() << "Current DFS numbers are (" << MemberDFSIn << "," << MemberDFSOut << ")\n"); @@ -2037,11 +2052,8 @@ continue; Value *Result = EliminationStack.back(); - // Don't replace our existing users with ourselves, and don't replace - // phi node arguments with the result of the same phi node. - // IE tmp = phi(tmp11, undef); tmp11 = foo -> tmp = phi(tmp, undef) - if (MemberUse->get() == Result || - (isa(Result) && MemberUse->getUser() == Result)) + // Don't replace our existing users with ourselves. + if (MemberUse->get() == Result) continue; DEBUG(dbgs() << "Found replacement " << *Result << " for " Index: llvm/trunk/test/Transforms/NewGVN/cyclic-phi-handling.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/cyclic-phi-handling.ll +++ llvm/trunk/test/Transforms/NewGVN/cyclic-phi-handling.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @foo(i32 %arg, i32 %arg1, i32 (i32, i32)* %arg2) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: bb: +; CHECK-NEXT: br label %bb3 +; CHECK: bb3: +; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ %arg1, %bb ], [ [[TMP:%.*]]4, %bb7 ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ %arg, %bb ], [ [[TMP]], %bb7 ] +; CHECK-NEXT: [[TMP5:%.*]] = call i32 %arg2(i32 [[TMP4]], i32 [[TMP]]) +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[TMP6]], label %bb7, label %bb8 +; CHECK: bb7: +; CHECK-NEXT: br label %bb3 +; CHECK: bb8: +; CHECK-NEXT: ret void +; +bb: + br label %bb3 + +;; While non-standard, llvm allows mutually dependent phi nodes +;; Ensure we do not infinite loop trying to process them +bb3: ; preds = %bb7, %bb + %tmp = phi i32 [ %arg1, %bb ], [ %tmp4, %bb7 ] + %tmp4 = phi i32 [ %arg, %bb ], [ %tmp, %bb7 ] + %tmp5 = call i32 %arg2(i32 %tmp4, i32 %tmp) + %tmp6 = icmp ne i32 %tmp5, 0 + br i1 %tmp6, label %bb7, label %bb8 + +bb7: ; preds = %bb3 + br label %bb3 + +bb8: ; preds = %bb3 + ret void +} Index: llvm/trunk/test/Transforms/NewGVN/pr31501.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/pr31501.ll +++ llvm/trunk/test/Transforms/NewGVN/pr31501.ll @@ -0,0 +1,136 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +%struct.foo = type { %struct.wombat.28*, %struct.zot, %struct.wombat.28* } +%struct.zot = type { i64 } +%struct.barney = type <{ %struct.wombat.28*, %struct.wibble, %struct.snork, %struct.quux.4, %struct.snork.10, %struct.ham.16*, %struct.wobble.23*, i32, i8, i8, [2 x i8] }> +%struct.wibble = type { %struct.pluto, %struct.bar } +%struct.pluto = type { %struct.quux } +%struct.quux = type { %struct.eggs } +%struct.eggs = type { %struct.zot.0, %struct.widget } +%struct.zot.0 = type { i8*, i8*, i8* } +%struct.widget = type { %struct.barney.1 } +%struct.barney.1 = type { [8 x i8] } +%struct.bar = type { [3 x %struct.widget] } +%struct.snork = type <{ %struct.wobble, %struct.bar.3, [7 x i8] }> +%struct.wobble = type { %struct.wombat } +%struct.wombat = type { %struct.zot.2 } +%struct.zot.2 = type { %struct.zot.0, %struct.ham } +%struct.ham = type { %struct.barney.1 } +%struct.bar.3 = type { i8 } +%struct.quux.4 = type <{ %struct.quux.5, %struct.snork.9, [7 x i8] }> +%struct.quux.5 = type { %struct.widget.6 } +%struct.widget.6 = type { %struct.spam } +%struct.spam = type { %struct.zot.0, %struct.ham.7 } +%struct.ham.7 = type { %struct.barney.8 } +%struct.barney.8 = type { [24 x i8] } +%struct.snork.9 = type { i8 } +%struct.snork.10 = type <{ %struct.foo.11, %struct.spam.15, [7 x i8] }> +%struct.foo.11 = type { %struct.snork.12 } +%struct.snork.12 = type { %struct.wombat.13 } +%struct.wombat.13 = type { %struct.zot.0, %struct.wibble.14 } +%struct.wibble.14 = type { %struct.barney.8 } +%struct.spam.15 = type { i8 } +%struct.ham.16 = type { %struct.pluto.17, %struct.pluto.17 } +%struct.pluto.17 = type { %struct.bar.18 } +%struct.bar.18 = type { %struct.baz*, %struct.zot.20, %struct.barney.22 } +%struct.baz = type { %struct.wibble.19* } +%struct.wibble.19 = type <{ %struct.baz, %struct.wibble.19*, %struct.baz*, i8, [7 x i8] }> +%struct.zot.20 = type { %struct.ham.21 } +%struct.ham.21 = type { %struct.baz } +%struct.barney.22 = type { %struct.blam } +%struct.blam = type { i64 } +%struct.wobble.23 = type { %struct.spam.24, %struct.barney* } +%struct.spam.24 = type { %struct.bar.25, %struct.zot.26* } +%struct.bar.25 = type <{ i32 (...)**, i8, i8 }> +%struct.zot.26 = type { i32 (...)**, i32, %struct.widget.27* } +%struct.widget.27 = type { %struct.zot.26, %struct.zot.26* } +%struct.wombat.28 = type <{ i32 (...)**, i8, i8, [6 x i8] }> + +; Function Attrs: norecurse nounwind ssp uwtable +define weak_odr hidden %struct.foo* @quux(%struct.barney* %arg, %struct.wombat.28* %arg1) local_unnamed_addr #0 align 2 { +; CHECK-LABEL: @quux( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds %struct.barney, %struct.barney* %arg, i64 0, i32 3, i32 0, i32 0, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.spam* [[TMP]] to %struct.foo** +; CHECK-NEXT: [[TMP3:%.*]] = load %struct.foo*, %struct.foo** [[TMP2]], align 8, !tbaa !2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds %struct.barney, %struct.barney* %arg, i64 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to %struct.foo** +; CHECK-NEXT: [[TMP6:%.*]] = load %struct.foo*, %struct.foo** [[TMP5]], align 8, !tbaa !7 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq %struct.foo* [[TMP3]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label %bb21, label %bb8 +; CHECK: bb8: +; CHECK-NEXT: br label %bb11 +; CHECK: bb9: +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq %struct.foo* [[TMP18:%.*]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP10]], label %bb19, label %bb11 +; CHECK: bb11: +; CHECK-NEXT: [[TMP12:%.*]] = phi %struct.foo* [ [[TMP17:%.*]], %bb9 ], [ undef, %bb8 ] +; CHECK-NEXT: [[TMP13:%.*]] = phi %struct.foo* [ [[TMP18]], %bb9 ], [ [[TMP3]], %bb8 ] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds %struct.foo, %struct.foo* [[TMP13]], i64 0, i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = load %struct.wombat.28*, %struct.wombat.28** [[TMP14]], align 8, !tbaa !8 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq %struct.wombat.28* [[TMP15]], %arg1 +; CHECK-NEXT: [[TMP17]] = select i1 [[TMP16]], %struct.foo* [[TMP13]], %struct.foo* [[TMP12]] +; CHECK-NEXT: [[TMP18]] = getelementptr inbounds %struct.foo, %struct.foo* [[TMP13]], i64 1 +; CHECK-NEXT: br i1 [[TMP16]], label %bb19, label %bb9 +; CHECK: bb19: +; CHECK-NEXT: [[TMP20:%.*]] = phi %struct.foo* [ null, %bb9 ], [ [[TMP17]], %bb11 ] +; CHECK-NEXT: br label %bb21 +; CHECK: bb21: +; CHECK-NEXT: [[TMP22:%.*]] = phi %struct.foo* [ null, %bb ], [ [[TMP20]], %bb19 ] +; CHECK-NEXT: ret %struct.foo* [[TMP22]] +; +bb: + %tmp = getelementptr inbounds %struct.barney, %struct.barney* %arg, i64 0, i32 3, i32 0, i32 0, i32 0 + %tmp2 = bitcast %struct.spam* %tmp to %struct.foo** + %tmp3 = load %struct.foo*, %struct.foo** %tmp2, align 8, !tbaa !2 + %tmp4 = getelementptr inbounds %struct.barney, %struct.barney* %arg, i64 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 1 + %tmp5 = bitcast i8** %tmp4 to %struct.foo** + %tmp6 = load %struct.foo*, %struct.foo** %tmp5, align 8, !tbaa !7 + %tmp7 = icmp eq %struct.foo* %tmp3, %tmp6 + br i1 %tmp7, label %bb21, label %bb8 + +bb8: ; preds = %bb + br label %bb11 + +bb9: ; preds = %bb11 + %tmp10 = icmp eq %struct.foo* %tmp18, %tmp6 + br i1 %tmp10, label %bb19, label %bb11 + +bb11: ; preds = %bb9, %bb8 + %tmp12 = phi %struct.foo* [ %tmp17, %bb9 ], [ undef, %bb8 ] + %tmp13 = phi %struct.foo* [ %tmp18, %bb9 ], [ %tmp3, %bb8 ] + %tmp14 = getelementptr inbounds %struct.foo, %struct.foo* %tmp13, i64 0, i32 0 + %tmp15 = load %struct.wombat.28*, %struct.wombat.28** %tmp14, align 8, !tbaa !8 + %tmp16 = icmp eq %struct.wombat.28* %tmp15, %arg1 + %tmp17 = select i1 %tmp16, %struct.foo* %tmp13, %struct.foo* %tmp12 + %tmp18 = getelementptr inbounds %struct.foo, %struct.foo* %tmp13, i64 1 + br i1 %tmp16, label %bb19, label %bb9 + +bb19: ; preds = %bb11, %bb9 + %tmp20 = phi %struct.foo* [ null, %bb9 ], [ %tmp17, %bb11 ] + br label %bb21 + +bb21: ; preds = %bb19, %bb + %tmp22 = phi %struct.foo* [ null, %bb ], [ %tmp20, %bb19 ] + ret %struct.foo* %tmp22 +} + +attributes #0 = { norecurse nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"PIC Level", i32 2} +!1 = !{!"clang version 4.0.0 (http://llvm.org/git/clang.git b63fa9e2bb8aac0a80c3e3467991c6b1a4b01e6a) (llvm/trunk 290779)"} +!2 = !{!3, !4, i64 0} +!3 = !{!"_ZTSN4llvm15SmallVectorBaseE", !4, i64 0, !4, i64 8, !4, i64 16} +!4 = !{!"any pointer", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C++ TBAA"} +!7 = !{!3, !4, i64 8} +!8 = !{!9, !4, i64 0} +!9 = !{!"_ZTSN4llvm9RecordValE", !4, i64 0, !10, i64 8, !4, i64 16} +!10 = !{!"_ZTSN4llvm14PointerIntPairIPNS_5RecTyELj1EbNS_21PointerLikeTypeTraitsIS2_EENS_18PointerIntPairInfoIS2_Lj1ES4_EEEE", !11, i64 0} +!11 = !{!"long", !5, i64 0}