Index: llvm/include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -845,6 +845,17 @@ // If we bailed early, conservatively return found. if (MaxSteps != 0 && Visited.size() >= MaxSteps) return true; + #ifdef EXPENSIVE_CHECKS + // If we've we failed to find the node verify that topological pruning + // was correct by checking through deferred nodes. + if (TopologicalPrune && !Found) { + SmallPtrSet VisitedCheck(Visited.begin(), Visited.end()); + SmallVector WorklistCheck(Worklist.begin(),Worklist.end()); + //Do not give up. + bool Check = hasPredecessorHelper(N, VisitedCheck, WorklistCheck, 0, false); + assert(!Check && "Topological pruning erroneously caused us to miss finding node"); + } + #endif return Found; } Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2073,6 +2073,26 @@ return true; } +// Node fusions where some output edges come from a non-root node may result +// in child nodes violating the topological ordering of node ids. This helper +// invalidate the id (set to -1) a node (and any similarly recursively) if +// they have a node id less than Id. +static void InvalidateNodeIds(SDNode *Node, int Id) { + if (Id <= 0) + return; + SmallVector Nodes; + Nodes.push_back(Node); + while (!Nodes.empty()) { + SDNode *N = Nodes.pop_back_val(); + int NId = N->getNodeId(); + if (NId > 0 && NId < Id) { + N->setNodeId(-1); + for (auto *U : N->uses()) + Nodes.push_back(U); + } + } +} + /// Check whether or not the chain ending in StoreNode is suitable for doing /// the {load; op; store} to modify transformation. static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, @@ -2416,6 +2436,34 @@ // Update Load Chain uses as well. ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1)); ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); + + // As discussed in isFusableLoadOpStorePattern, We induce + // dependences between Xn and Zn. While we have verified the merge + // is safe, this induced dependence may violate the topological + // ordering of node ids (i.e. an Xn may have a id larger than Zn). + // This may cause us to erroneously curtail a topological cycle checking + // search. Relabeling nodes is not viable in general so we resolve this + // by replacing all violating successor nodes id's (Zns and their successors + // with an id less than the largest Xn with an invalid topological id. + + // Calculate a lowerbound on the maximum Xn id. Check StoreNode's Chain's id. + // If it is invalid, find maximum of its operands. + + std::function getId = [&](SDValue N) -> int { + auto NId = N->getNodeId(); + if (!(NId > 0)) + for (const SDValue &Op : N->op_values()) + NId = std::max(NId, getId(Op)); + return NId; + }; + + int EffectiveId = getId(StoreNode->getOperand(0)); + for (auto UI = StoredVal->use_begin(), E = StoredVal->use_end(); UI != E; + ++UI) { + if (UI.getUse().getResNo() == 1) + InvalidateNodeIds(*UI, EffectiveId); + } + ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0)); CurDAG->RemoveDeadNode(Node); return true; Index: llvm/test/CodeGen/X86/pr36312.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/pr36312.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +%struct.anon = type { i32, i32 } + +@c = common global %struct.anon zeroinitializer, align 4 +@d = local_unnamed_addr global %struct.anon* @c, align 8 +@a = common local_unnamed_addr global i32 0, align 4 +@b = common local_unnamed_addr global i32 0, align 4 + +; Function Attrs: norecurse nounwind uwtable +define void @g() local_unnamed_addr #0 { +; CHECK-LABEL: g: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq {{.*}}(%rip), %rax +; CHECK-NEXT: movl 4(%rax), %eax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: incl {{.*}}(%rip) +; CHECK-NEXT: setne %cl +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: movl %ecx, {{.*}}(%rip) +; CHECK-NEXT: retq +entry: + %0 = load %struct.anon*, %struct.anon** @d, align 8 + %y = getelementptr inbounds %struct.anon, %struct.anon* %0, i64 0, i32 1 + %1 = load i32, i32* %y, align 4 + %2 = load i32, i32* @b, align 4 + %inc = add nsw i32 %2, 1 + store i32 %inc, i32* @b, align 4 + %tobool = icmp ne i32 %inc, 0 + %land.ext = zext i1 %tobool to i32 + %add = add nsw i32 %1, %land.ext + store i32 %add, i32* @a, align 4 + ret void +}