Index: llvm/include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -867,6 +867,14 @@ } for (const SDValue &OpV : M->op_values()) { SDNode *Op = OpV.getNode(); + // If we are adding a glued node, its glued user should be considered a + // predecessor as well to prevent a node merge causing a non-immediate + // use of a glue operand. Walk down all unvisited glue users. + while (auto *GN = Op->getGluedUser()) { + if ((GN == M) || Visited.count(GN)) + break; + Op = GN; + } if (Visited.insert(Op).second) Worklist.push_back(Op); if (Op == N) Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8052,10 +8052,39 @@ unsigned DAGSize = 0; // SortedPos tracks the progress of the algorithm. Nodes before it are - // sorted, nodes after it are unsorted. When the algorithm completes - // it is at the end of the list. + // sorted, nodes after it are unsorted. The first NumPendingGlueNodes + // after SortedPos are glue nodes whose direct operands have been scheduled, + // but may still having indirect predecessors via their glue users. These + // pending nodes not necessarily ordered between them. + + int NumPendingGlueNodes = 0; allnodes_iterator SortedPos = allnodes_begin(); + auto placeNodeInSortedPos = [&](SDNode *N) { + auto Q = N->getIterator(); + + if (N->getGluedUser()) { + allnodes_iterator InsertPos = SortedPos; + // Insert node at end of pending Glue + for (int i = 0; i < NumPendingGlueNodes; ++i) + InsertPos++; + + if (Q != InsertPos) + InsertPos = AllNodes.insert(InsertPos, AllNodes.remove(N)); + + // If SortedPos is InsertPos + if (NumPendingGlueNodes == 0) + SortedPos = InsertPos; + // Record we added the node. + NumPendingGlueNodes++; + return; + } + // Non-glued nodes append to SortedPos immediately. + if (Q != SortedPos) + SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(N)); + SortedPos++; + }; + // Visit all the nodes. Move nodes with no operands to the front of // the list immediately. Annotate nodes that do have operands with their // operand count. Before we do this, the Node Id fields of the nodes @@ -8063,18 +8092,12 @@ // before SortedPos will contain the topological sort index, and the // Node Id fields for nodes At SortedPos and after will contain the // count of outstanding operands. - for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { + for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E;) { SDNode *N = &*I++; checkForCycles(N, this); unsigned Degree = N->getNumOperands(); if (Degree == 0) { - // A node with no uses, add it to the result array immediately. - N->setNodeId(DAGSize++); - allnodes_iterator Q(N); - if (Q != SortedPos) - SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q)); - assert(SortedPos != AllNodes.end() && "Overran node list"); - ++SortedPos; + placeNodeInSortedPos(N); } else { // Temporarily use the Node Id as scratch space for the degree count. N->setNodeId(Degree); @@ -8085,6 +8108,46 @@ // such that by the time the end is reached all nodes will be sorted. for (SDNode &Node : allnodes()) { SDNode *N = &Node; + + // If N is has a glue user, all non-glued nodes have been dealt with, but we + // still have a dependence on another glued node. Since this is very rare we + // explicitly check all subsequent nodes to see if they're a predecessor and + // if so put it at the end of our pending glue nodes and try the next node. + if (N->getGluedUser()) { + assert(N->getIterator() == SortedPos && + "Consider glue nodes only finished with all sorted nodes"); + assert(NumPendingGlueNodes && + "Overran SortedPos without pending glue nodes"); + SmallPtrSet Visited; + SmallVector Worklist; + auto Q = N->getIterator(); + // Search up from bottom of glue chain + SDNode *StartN = N; + while (auto GlueN = StartN->getGluedUser()) + StartN = GlueN; + + Worklist.push_back(StartN); + bool Defer = false; + while (Q != SortedPos) { + SDNode *OtherN = &*Q++; + if (SDNode::hasPredecessorHelper(OtherN, Visited, Worklist)) { + Defer = true; + break; + } + } + if (Defer) { + // This depends on another glued node, defer it again. + placeNodeInSortedPos(N); + continue; + } + // We are picking this one, so increment SortedPos and check that we point + // to the correct place. + assert(N->getIterator() == SortedPos); + SortedPos++; + --NumPendingGlueNodes; + } + + // All operands of N are guaranteed to be in order at this point. checkForCycles(N, this); // N is in sorted position, so all its uses have one less operand // that needs to be sorted. @@ -8095,17 +8158,16 @@ assert(Degree != 0 && "Invalid node degree"); --Degree; if (Degree == 0) { - // All of P's operands are sorted, so P may sorted now. - P->setNodeId(DAGSize++); - if (P->getIterator() != SortedPos) - SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P)); - assert(SortedPos != AllNodes.end() && "Overran node list"); - ++SortedPos; + placeNodeInSortedPos(P); } else { // Update P's outstanding operand count. P->setNodeId(Degree); } } + + // Now we're finishing it, mark the Id. + N->setNodeId(DAGSize++); + if (Node.getIterator() == SortedPos) { #ifndef NDEBUG allnodes_iterator I(N); Index: llvm/test/CodeGen/AVR/glue-dag-combine-bug.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AVR/glue-dag-combine-bug.ll @@ -0,0 +1,51 @@ +; RUN: llc < %s -mtriple=avr-unknown-unknown + +%"foo" = type { [0 x i8], i16, [0 x i8], [40 x i32], [0 x i8] } + +define void @bar() unnamed_addr addrspace(0) #0 personality i32 (...) addrspace(1)* @rust_eh_personality { +start: + %_7.sroa.0.0..sroa_idx.i = getelementptr inbounds %"foo", %"foo"* undef, i16 0, i32 3, i16 0 + switch i2 undef, label %bb5.i2 [ + i2 0, label %bb2.i + i2 1, label %bb3.i + i2 -2, label %bb4.i + ] + +bb2.i: ; preds = %start + unreachable + +bb3.i: ; preds = %start + unreachable + +bb4.i: ; preds = %start + br i1 undef, label %bb7, label %bb9.i5.i + +bb9.i5.i: ; preds = %bb4.i + %0 = call addrspace(1) { i8, i1 } @llvm.usub.with.overflow.i8(i8 0, i8 48) + %1 = extractvalue { i8, i1 } %0, 0 + %2 = zext i8 %1 to i32 + %3 = load i32, i32* %_7.sroa.0.0..sroa_idx.i, align 1 + %4 = call addrspace(1) { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %3, i32 %2) #3 + %5 = extractvalue { i32, i1 } %4, 0 + store i32 %5, i32* %_7.sroa.0.0..sroa_idx.i, align 1 + unreachable + +bb5.i2: ; preds = %start + unreachable + +bb7: ; preds = %bb4.i + ret void +} + +; Function Attrs: nounwind readnone speculatable +declare { i8, i1 } @llvm.usub.with.overflow.i8(i8, i8) addrspace(1) #1 + +declare i32 @rust_eh_personality(...) unnamed_addr addrspace(1) #2 + +; Function Attrs: nounwind readnone speculatable +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) addrspace(1) #1 + +attributes #0 = { uwtable } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { "target-cpu"="atmega32u4" } +attributes #3 = { nounwind }