Index: lib/Target/SystemZ/SystemZISelDAGToDAG.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -1275,6 +1275,9 @@ InputChain = LoadNode->getChain(); } else if (Chain.getOpcode() == ISD::TokenFactor) { SmallVector ChainOps; + SmallVector LoopWorklist; + SmallPtrSet Visited; + const unsigned int Max = 1024; for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { SDValue Op = Chain.getOperand(i); if (Op == Load.getValue(1)) { @@ -1283,28 +1286,26 @@ ChainOps.push_back(Load.getOperand(0)); continue; } - - // Make sure using Op as part of the chain would not cause a cycle here. - // In theory, we could check whether the chain node is a predecessor of - // the load. But that can be very expensive. Instead visit the uses and - // make sure they all have smaller node id than the load. - int LoadId = LoadNode->getNodeId(); - for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = UI->use_end(); UI != UE; ++UI) { - if (UI.getUse().getResNo() != 0) - continue; - if (UI->getNodeId() > LoadId) - return false; - } - + LoopWorklist.push_back(Op.getNode()); ChainOps.push_back(Op); } - if (ChainCheck) + if (ChainCheck) { + // Add the other operand of StoredVal to worklist. + for (SDValue Op : StoredVal->ops()) + if (Op.getNode() != LoadNode) + LoopWorklist.push_back(Op.getNode()); + + // Check if Load is reachable from any of the nodes in the worklist. + if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max, + true)) + return false; + // Make a new TokenFactor with all the other input chains except // for the load. InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps); + } } if (!ChainCheck) return false; Index: test/CodeGen/SystemZ/int-uadd-12.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/int-uadd-12.ll @@ -0,0 +1,34 @@ +; Test that this test case does not abort after the folding of load -> add -> +; store into an alsi. This folding is suppose to not happen as it would +; introduce a loop in the DAG. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -disable-basicaa -consthoist-gep | FileCheck %s + +@g_295 = external dso_local unnamed_addr global i32, align 4 +@g_672 = external dso_local unnamed_addr global i64, align 8 +@g_1484 = external dso_local global <{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>, align 2 + +define void @fun() { +; CHECK-LABEL: fun: + +bb: + br label %bb1 + +bb1: ; preds = %bb1, %bb + store i32 2, i32* getelementptr inbounds (<{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>, <{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>* @g_1484, i64 0, i32 2, i32 16) + %tmp = icmp slt i32 undef, 3 + br i1 %tmp, label %bb1, label %bb2 + +bb2: ; preds = %bb1 + %tmp3 = load i32, i32* getelementptr inbounds (<{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>, <{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>* @g_1484, i64 0, i32 2, i32 28) + %tmp4 = load i64, i64* @g_672 + %tmp5 = add i64 %tmp4, 1 + store i64 %tmp5, i64* @g_672 + %tmp6 = icmp eq i64 %tmp5, 0 + %tmp7 = zext i1 %tmp6 to i32 + %tmp8 = icmp ult i32 %tmp3, %tmp7 + %tmp9 = zext i1 %tmp8 to i32 + store i32 %tmp9, i32* @g_295 + ret void +} +