Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -152,6 +152,12 @@ /// which have not yet been combined to the worklist. SmallPtrSet CombinedNodes; + /// Set of nodes which have tried tomatch a deep pattern. + /// + /// This is used to ensure the nodes gets processed again in case the + /// DAG is modified. + SmallPtrSet DeepPatternNodes; + // AA - Used for DAG load/store alias analysis. AliasAnalysis *AA; @@ -236,6 +242,7 @@ void removeFromWorklist(SDNode *N) { CombinedNodes.erase(N); PruningList.remove(N); + DeepPatternNodes.erase(N); auto It = WorklistMap.find(N); if (It == WorklistMap.end()) @@ -1207,6 +1214,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts) { TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); + KnownBits Known; if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO)) return false; @@ -1574,77 +1582,89 @@ // changes of the root. HandleSDNode Dummy(DAG.getRoot()); - // While we have a valid worklist entry node, try to combine it. - while (SDNode *N = getNextWorklistEntry()) { - // If N has no uses, it is dead. Make sure to revisit all N's operands once - // N is deleted from the DAG, since they too may now be dead or may have a - // reduced number of uses, allowing other xforms. - if (recursivelyDeleteUnusedNodes(N)) - continue; + for (unsigned Iteration = 0; Iteration < 3; Iteration++) { + bool Changed = false; - WorklistRemover DeadNodes(*this); + // While we have a valid worklist entry node, try to combine it. + while (SDNode *N = getNextWorklistEntry()) { + // If N has no uses, it is dead. Make sure to revisit all N's operands once + // N is deleted from the DAG, since they too may now be dead or may have a + // reduced number of uses, allowing other xforms. + if (recursivelyDeleteUnusedNodes(N)) + continue; - // If this combine is running after legalizing the DAG, re-legalize any - // nodes pulled off the worklist. - if (Level == AfterLegalizeDAG) { - SmallSetVector UpdatedNodes; - bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); + WorklistRemover DeadNodes(*this); + + // If this combine is running after legalizing the DAG, re-legalize any + // nodes pulled off the worklist. + if (Level == AfterLegalizeDAG) { + SmallSetVector UpdatedNodes; + bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); - for (SDNode *LN : UpdatedNodes) { - AddToWorklist(LN); - AddUsersToWorklist(LN); + for (SDNode *LN : UpdatedNodes) { + AddToWorklist(LN); + AddUsersToWorklist(LN); + } + if (!NIsValid) + continue; } - if (!NIsValid) - continue; - } - LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); + + // Add any operands of the new node which have not yet been combined to + // the worklist as well. Because the worklist uniques things already, + // this won't repeatedly process the same operand. + CombinedNodes.insert(N); + for (const SDValue &ChildN : N->op_values()) + if (!CombinedNodes.count(ChildN.getNode())) + AddToWorklist(ChildN.getNode()); - // Add any operands of the new node which have not yet been combined to the - // worklist as well. Because the worklist uniques things already, this - // won't repeatedly process the same operand. - CombinedNodes.insert(N); - for (const SDValue &ChildN : N->op_values()) - if (!CombinedNodes.count(ChildN.getNode())) - AddToWorklist(ChildN.getNode()); + SDValue RV = combine(N); - SDValue RV = combine(N); + if (!RV.getNode()) + continue; - if (!RV.getNode()) - continue; + ++NodesCombined; + Changed = true; - ++NodesCombined; + // If we get back the same node we passed in, rather than a new node or + // zero, we know that the node must have defined multiple values and + // CombineTo was used. Since CombineTo takes care of the worklist + // mechanics for us, we have no work to do in this case. + if (RV.getNode() == N) + continue; - // If we get back the same node we passed in, rather than a new node or - // zero, we know that the node must have defined multiple values and - // CombineTo was used. Since CombineTo takes care of the worklist - // mechanics for us, we have no work to do in this case. - if (RV.getNode() == N) - continue; + assert(N->getOpcode() != ISD::DELETED_NODE && + RV.getOpcode() != ISD::DELETED_NODE && + "Node was deleted but visit returned new node!"); - assert(N->getOpcode() != ISD::DELETED_NODE && - RV.getOpcode() != ISD::DELETED_NODE && - "Node was deleted but visit returned new node!"); + LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG)); + + if (N->getNumValues() == RV.getNode()->getNumValues()) + DAG.ReplaceAllUsesWith(N, RV.getNode()); + else { + assert(N->getValueType(0) == RV.getValueType() && + N->getNumValues() == 1 && "Type mismatch"); + DAG.ReplaceAllUsesWith(N, &RV); + } - LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG)); + // Push the new node and any users onto the worklist + AddToWorklist(RV.getNode()); + AddUsersToWorklist(RV.getNode()); - if (N->getNumValues() == RV.getNode()->getNumValues()) - DAG.ReplaceAllUsesWith(N, RV.getNode()); - else { - assert(N->getValueType(0) == RV.getValueType() && - N->getNumValues() == 1 && "Type mismatch"); - DAG.ReplaceAllUsesWith(N, &RV); + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. This will also take care of adding any + // operands which have lost a user to the worklist. + recursivelyDeleteUnusedNodes(N); } - // Push the new node and any users onto the worklist - AddToWorklist(RV.getNode()); - AddUsersToWorklist(RV.getNode()); + if (!Changed) + break; - // Finally, if the node is now dead, remove it from the graph. The node - // may not be dead if the replacement process recursively simplified to - // something else needing this node. This will also take care of adding any - // operands which have lost a user to the worklist. - recursivelyDeleteUnusedNodes(N); + // Make sure we process the nodes for which a new combine may exist. + for (SDNode *N : DeepPatternNodes) + AddToWorklist(N); } // If the root changed (e.g. it was a dead load, update the root). @@ -2972,6 +2992,8 @@ return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0.getOperand(0), N0.getOperand(1), CarryIn); + DeepPatternNodes.insert(N); + /** * When one of the addcarry argument is itself a carry, we may be facing * a diamond carry propagation. In which case we try to transform the DAG Index: test/CodeGen/X86/addcarry.ll =================================================================== --- test/CodeGen/X86/addcarry.ll +++ test/CodeGen/X86/addcarry.ll @@ -312,23 +312,31 @@ define %S @readd(%S* nocapture readonly %this, %S %arg.b) { ; CHECK-LABEL: readd: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: addq (%rsi), %rdx -; CHECK-NEXT: movq 8(%rsi), %r11 -; CHECK-NEXT: adcq $0, %r11 -; CHECK-NEXT: setb %r10b -; CHECK-NEXT: movzbl %r10b, %edi -; CHECK-NEXT: addq %rcx, %r11 -; CHECK-NEXT: adcq 16(%rsi), %rdi +; CHECK-NEXT: movq (%rsi), %r10 +; CHECK-NEXT: movq %rdx, %rdi +; CHECK-NEXT: addq %r10, %rdi +; CHECK-NEXT: movq 8(%rsi), %rdi +; CHECK-NEXT: movq 16(%rsi), %r11 +; CHECK-NEXT: movq %rcx, %rbx +; CHECK-NEXT: adcq %rdi, %rbx +; CHECK-NEXT: addq %r10, %rdx +; CHECK-NEXT: adcq %rdi, %rcx ; CHECK-NEXT: setb %cl -; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: addq %r8, %rdi -; CHECK-NEXT: adcq 24(%rsi), %rcx -; CHECK-NEXT: addq %r9, %rcx +; CHECK-NEXT: movq %r8, %rdi +; CHECK-NEXT: adcq %r11, %rdi +; CHECK-NEXT: addb $255, %cl +; CHECK-NEXT: adcq %r11, %r8 +; CHECK-NEXT: adcq 24(%rsi), %r9 ; CHECK-NEXT: movq %rdx, (%rax) -; CHECK-NEXT: movq %r11, 8(%rax) +; CHECK-NEXT: movq %rbx, 8(%rax) ; CHECK-NEXT: movq %rdi, 16(%rax) -; CHECK-NEXT: movq %rcx, 24(%rax) +; CHECK-NEXT: movq %r9, 24(%rax) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq entry: %0 = extractvalue %S %arg.b, 0 @@ -390,17 +398,18 @@ define i128 @addcarry_to_subcarry(i64 %a, i64 %b) { ; CHECK-LABEL: addcarry_to_subcarry: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: notq %rsi -; CHECK-NEXT: movb $1, %cl -; CHECK-NEXT: addb $-1, %cl -; CHECK-NEXT: movq %rdi, %rcx -; CHECK-NEXT: adcq %rsi, %rcx -; CHECK-NEXT: adcq $0, %rax +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: addb $-1, %al +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: adcq %rsi, %rax +; CHECK-NEXT: setb %cl +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: adcq %rsi, %rax +; CHECK-NEXT: addb $255, %cl +; CHECK-NEXT: adcq %rdi, %rsi ; CHECK-NEXT: setb %cl ; CHECK-NEXT: movzbl %cl, %edx -; CHECK-NEXT: addq %rsi, %rax -; CHECK-NEXT: adcq $0, %rdx ; CHECK-NEXT: retq %notb = xor i64 %b, -1 %notb128 = zext i64 %notb to i128 Index: test/CodeGen/X86/subcarry.ll =================================================================== --- test/CodeGen/X86/subcarry.ll +++ test/CodeGen/X86/subcarry.ll @@ -90,36 +90,43 @@ define %S @sub(%S* nocapture readonly %this, %S %arg.b) local_unnamed_addr { ; CHECK-LABEL: sub: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %r14 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: .cfi_offset %rbx, -24 +; CHECK-NEXT: .cfi_offset %r14, -16 ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rdx, %rdi +; CHECK-NEXT: notq %rdi ; CHECK-NEXT: movq (%rsi), %r10 -; CHECK-NEXT: movq 8(%rsi), %rdi -; CHECK-NEXT: movq %r10, %r11 -; CHECK-NEXT: subq %rdx, %r11 -; CHECK-NEXT: notq %rdx +; CHECK-NEXT: movq 8(%rsi), %r11 ; CHECK-NEXT: movb $1, %bl ; CHECK-NEXT: addb $-1, %bl -; CHECK-NEXT: adcq %r10, %rdx -; CHECK-NEXT: adcq $0, %rdi -; CHECK-NEXT: setb %dl -; CHECK-NEXT: movzbl %dl, %edx +; CHECK-NEXT: adcq %r10, %rdi ; CHECK-NEXT: notq %rcx -; CHECK-NEXT: addq %rdi, %rcx -; CHECK-NEXT: adcq 16(%rsi), %rdx ; CHECK-NEXT: setb %bl -; CHECK-NEXT: movzbl %bl, %edi +; CHECK-NEXT: movq %rcx, %rdi +; CHECK-NEXT: adcq %r11, %rdi +; CHECK-NEXT: movq 16(%rsi), %r14 +; CHECK-NEXT: addb $255, %bl +; CHECK-NEXT: adcq %r11, %rcx ; CHECK-NEXT: notq %r8 -; CHECK-NEXT: addq %rdx, %r8 -; CHECK-NEXT: adcq 24(%rsi), %rdi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: movq %r8, %rbx +; CHECK-NEXT: adcq %r14, %rbx +; CHECK-NEXT: addb $255, %cl +; CHECK-NEXT: adcq %r14, %r8 ; CHECK-NEXT: notq %r9 -; CHECK-NEXT: addq %rdi, %r9 -; CHECK-NEXT: movq %r11, (%rax) -; CHECK-NEXT: movq %rcx, 8(%rax) -; CHECK-NEXT: movq %r8, 16(%rax) +; CHECK-NEXT: adcq 24(%rsi), %r9 +; CHECK-NEXT: subq %rdx, %r10 +; CHECK-NEXT: movq %r10, (%rax) +; CHECK-NEXT: movq %rdi, 8(%rax) +; CHECK-NEXT: movq %rbx, 16(%rax) ; CHECK-NEXT: movq %r9, 24(%rax) ; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popq %r14 ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq entry: