Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -4472,6 +4472,47 @@ } } + // Optimize {s|u}{add|sub}.with.overflow feeding into an "and 1" and then + // a branch instruction. + auto IsAndOfOptimizableOverflow = [&](const SDValue &LHS, + const SDValue &RHS) { + unsigned Opc = LHS.getOpcode(); + return LHS.getResNo() == 1 && isOneConstant(RHS) && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO); + }; + if (LHS.getOpcode() == ISD::AND && + (isOneConstant(RHS) || isNullConstant(RHS)) && + IsAndOfOptimizableOverflow(LHS.getOperand(0), LHS.getOperand(1))) { + SDValue OverflowOp = LHS.getOperand(0); + assert((CC == ISD::SETEQ || CC == ISD::SETNE) && + "Unexpected condition code."); + // Only lower legal XALUO ops. + if (!DAG.getTargetLoweringInfo().isTypeLegal(OverflowOp->getValueType(0))) + return SDValue(); + + // The actual operation with overflow check. + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = + getARMXALUOOp(OverflowOp.getValue(0), DAG, ARMcc); + + auto InvertCondCode = [&DAG](const SDValue &ARMcc) { + ARMCC::CondCodes CondCode = + (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); + CondCode = ARMCC::getOppositeCondition(CondCode); + return DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); + }; + if (CC == ISD::SETNE) + ARMcc = InvertCondCode(ARMcc); + if (isOneConstant(RHS)) + ARMcc = InvertCondCode(ARMcc); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, + OverflowCmp); + } + if (LHS.getValueType() == MVT::i32) { SDValue ARMcc; SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); Index: test/CodeGen/ARM/su_addsub_overflow.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/su_addsub_overflow.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm-eabi -mcpu=generic | FileCheck %s + +define i32 @sadd(i32 %a, i32 %b) local_unnamed_addr #0 { +; CHECK-LABEL: sadd: +; CHECK: mov r[[R0:[0-9]+]], r0 +; CHECK-NEXT: add r[[R1:[0-9]+]], r[[R0]], r1 +; CHECK-NEXT: cmp r[[R1]], r[[R0]] +; CHECK-NEXT: movvc pc, lr +entry: + %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont + +trap: + tail call void @llvm.trap() #2 + unreachable + +cont: + %2 = extractvalue { i32, i1 } %0, 0 + ret i32 %2 + +} + +define i32 @uadd(i32 %a, i32 %b) local_unnamed_addr #0 { +; CHECK-LABEL: uadd: +; CHECK: mov r[[R0:[0-9]+]], r0 +; CHECK-NEXT: add r[[R1:[0-9]+]], r[[R0]], r1 +; CHECK-NEXT: cmp r[[R1]], r[[R0]] +; CHECK-NEXT: movhs pc, lr +entry: + %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont + +trap: + tail call void @llvm.trap() #2 + unreachable + +cont: + %2 = extractvalue { i32, i1 } %0, 0 + ret i32 %2 + +} + +define i32 @ssub(i32 %a, i32 %b) local_unnamed_addr #0 { +; CHECK-LABEL: ssub: +; CHECK: cmp r0, r1 +; CHECK-NEXT: subvc r0, r0, r1 +; CHECK-NEXT: movvc pc, lr +entry: + %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont + +trap: + tail call void @llvm.trap() #2 + unreachable + +cont: + %2 = extractvalue { i32, i1 } %0, 0 + ret i32 %2 + +} + +define i32 @usub(i32 %a, i32 %b) local_unnamed_addr #0 { +; CHECK-LABEL: usub: +; CHECK: cmp r0, r1 +; CHECK-NEXT: subhs r0, r0, r1 +; CHECK-NEXT: movhs pc, lr +entry: + %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont + +trap: + tail call void @llvm.trap() #2 + unreachable + +cont: + %2 = extractvalue { i32, i1 } %0, 0 + ret i32 %2 + +} + +declare void @llvm.trap() #2 +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1