Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2464,6 +2464,37 @@ Complexity += 2; } + // Heuristic: try harder to form an LEA from ADD if the operands set flags. + // Unlike ADD, LEA does not affect flags, so we will be less likely to require + // duplicating flag-producing instructions later in the pipeline. + if (N.getOpcode() == ISD::ADD) { + auto isMathWithFlags = [](SDValue V) { + switch (V.getOpcode()) { + case X86ISD::ADD: + case X86ISD::SUB: + case X86ISD::ADC: + case X86ISD::SBB: + /* TODO: These opcodes can be added safely, but we may want to justify + their inclusion for different reasons (better for reg-alloc). + case X86ISD::SMUL: + case X86ISD::UMUL: + case X86ISD::OR: + case X86ISD::XOR: + case X86ISD::AND: + */ + // Value 1 is the flag output of the node - verify it's not dead. + return !SDValue(V.getNode(), 1).use_empty(); + default: + return false; + } + }; + // TODO: This could be an 'or' rather than 'and' to make the transform more + // likely to happen. We might want to factor in whether there's a + // load folding opportunity for the math op that disappears with LEA. + if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1))) + Complexity++; + } + if (AM.Disp) Complexity++; Index: llvm/test/CodeGen/X86/combine-sbb.ll =================================================================== --- llvm/test/CodeGen/X86/combine-sbb.ll +++ llvm/test/CodeGen/X86/combine-sbb.ll @@ -309,35 +309,25 @@ define i32 @PR40483_sub6(i32*, i32) nounwind { ; X86-LABEL: PR40483_sub6: ; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl (%edx), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %esi, %ecx -; X86-NEXT: subl %edi, %ecx +; X86-NEXT: movl (%edx), %ecx ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: subl %edi, %esi -; X86-NEXT: movl %esi, (%edx) +; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, (%edx) ; X86-NEXT: jae .LBB8_2 ; X86-NEXT: # %bb.1: -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: leal (%ecx,%ecx), %eax ; X86-NEXT: .LBB8_2: -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi ; X86-NEXT: retl ; ; X64-LABEL: PR40483_sub6: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: subl %esi, %edx -; X64-NEXT: addl %edx, %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: subl %esi, %ecx -; X64-NEXT: movl %ecx, (%rdi) -; X64-NEXT: cmovbl %edx, %eax +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: subl %esi, %eax +; X64-NEXT: movl %eax, (%rdi) +; X64-NEXT: leal (%rax,%rax), %eax +; X64-NEXT: cmovael %ecx, %eax ; X64-NEXT: retq %3 = load i32, i32* %0, align 8 %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)