Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7309,25 +7309,44 @@ // fold selects based on a setcc into other things, such as min/max/abs if (N0.getOpcode() == ISD::SETCC) { + SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1); + ISD::CondCode CC = cast(N0.getOperand(2))->get(); + // select x, y (fcmp lt x, y) -> fminnum x, y // select x, y (fcmp gt x, y) -> fmaxnum x, y // // This is OK if we don't care about what happens if either operand is a // NaN. // - if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2)) { - ISD::CondCode CC = cast(N0.getOperand(2))->get(); - - if (SDValue FMinMax = combineMinNumMaxNum( - DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) + if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2)) + if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, + CC, TLI, DAG)) return FMinMax; + + // Use 'unsigned add with overflow' to optimize an unsigned saturating add. + // This is conservatively limited to pre-legal-operations to give targets + // a chance to reverse the transform if they want to do that. Also, it is + // unlikely that the pattern would be formed late, so it's probably not + // worth going through the other checks. + if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) && + CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) && + N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) { + auto *C = dyn_cast(N2.getOperand(1)); + auto *NotC = dyn_cast(Cond1); + if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) { + // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) --> + // uaddo Cond0, C; select uaddo.1, -1, uaddo.0 + SDVTList VTs = DAG.getVTList(VT, VT0); + SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1)); + return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0)); + } } if ((!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || TLI.isOperationLegal(ISD::SELECT_CC, VT)) - return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0), - N0.getOperand(1), N1, N2, N0.getOperand(2)); + return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2, + N0.getOperand(2)); return SimplifySelect(DL, N0, N1, N2); } Index: test/CodeGen/AArch64/sat-add.ll =================================================================== --- test/CodeGen/AArch64/sat-add.ll +++ test/CodeGen/AArch64/sat-add.ll @@ -123,9 +123,8 @@ define i32 @unsigned_sat_constant_i32_using_cmp_notval(i32 %x) { ; CHECK-LABEL: unsigned_sat_constant_i32_using_cmp_notval: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, #42 // =42 -; CHECK-NEXT: cmn w0, #43 // =43 -; CHECK-NEXT: csinv w0, w8, wzr, ls +; CHECK-NEXT: adds w8, w0, #42 // =42 +; CHECK-NEXT: csinv w0, w8, wzr, lo ; CHECK-NEXT: ret %a = add i32 %x, 42 %c = icmp ugt i32 %x, -43 @@ -162,9 +161,8 @@ define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) { ; CHECK-LABEL: unsigned_sat_constant_i64_using_cmp_notval: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #42 // =42 -; CHECK-NEXT: cmn x0, #43 // =43 -; CHECK-NEXT: csinv x0, x8, xzr, ls +; CHECK-NEXT: adds x8, x0, #42 // =42 +; CHECK-NEXT: csinv x0, x8, xzr, lo ; CHECK-NEXT: ret %a = add i64 %x, 42 %c = icmp ugt i64 %x, -43 Index: test/CodeGen/X86/sat-add.ll =================================================================== --- test/CodeGen/X86/sat-add.ll +++ test/CodeGen/X86/sat-add.ll @@ -43,11 +43,10 @@ define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) { ; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_notval: ; ANY: # %bb.0: -; ANY-NEXT: cmpb $-43, %dil +; ANY-NEXT: addb $42, %dil ; ANY-NEXT: movb $-1, %al -; ANY-NEXT: ja .LBB2_2 +; ANY-NEXT: jb .LBB2_2 ; ANY-NEXT: # %bb.1: -; ANY-NEXT: addb $42, %dil ; ANY-NEXT: movl %edi, %eax ; ANY-NEXT: .LBB2_2: ; ANY-NEXT: retq @@ -90,12 +89,9 @@ define i16 @unsigned_sat_constant_i16_using_cmp_notval(i16 %x) { ; ANY-LABEL: unsigned_sat_constant_i16_using_cmp_notval: ; ANY: # %bb.0: -; ANY-NEXT: # kill: def $edi killed $edi def $rdi -; ANY-NEXT: leal 42(%rdi), %ecx -; ANY-NEXT: movzwl %di, %eax -; ANY-NEXT: cmpl $65493, %eax # imm = 0xFFD5 +; ANY-NEXT: addw $42, %di ; ANY-NEXT: movl $65535, %eax # imm = 0xFFFF -; ANY-NEXT: cmovbel %ecx, %eax +; ANY-NEXT: cmovael %edi, %eax ; ANY-NEXT: # kill: def $ax killed $ax killed $eax ; ANY-NEXT: retq %a = add i16 %x, 42 @@ -134,11 +130,9 @@ define i32 @unsigned_sat_constant_i32_using_cmp_notval(i32 %x) { ; ANY-LABEL: unsigned_sat_constant_i32_using_cmp_notval: ; ANY: # %bb.0: -; ANY-NEXT: # kill: def $edi killed $edi def $rdi -; ANY-NEXT: leal 42(%rdi), %ecx -; ANY-NEXT: cmpl $-43, %edi +; ANY-NEXT: addl $42, %edi ; ANY-NEXT: movl $-1, %eax -; ANY-NEXT: cmovbel %ecx, %eax +; ANY-NEXT: cmovael %edi, %eax ; ANY-NEXT: retq %a = add i32 %x, 42 %c = icmp ugt i32 %x, -43 @@ -176,10 +170,9 @@ define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) { ; ANY-LABEL: unsigned_sat_constant_i64_using_cmp_notval: ; ANY: # %bb.0: -; ANY-NEXT: cmpq $-43, %rdi -; ANY-NEXT: leaq 42(%rdi), %rax -; ANY-NEXT: movq $-1, %rcx -; ANY-NEXT: cmovaq %rcx, %rax +; ANY-NEXT: addq $42, %rdi +; ANY-NEXT: movq $-1, %rax +; ANY-NEXT: cmovaeq %rdi, %rax ; ANY-NEXT: retq %a = add i64 %x, 42 %c = icmp ugt i64 %x, -43