diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -279,6 +279,10 @@ QSUB8b, QADD16b, QSUB16b, + UQADD8b, + UQSUB8b, + UQADD16b, + UQSUB16b, // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1115,6 +1115,10 @@ setOperationAction(ISD::SSUBSAT, MVT::i8, Custom); setOperationAction(ISD::SADDSAT, MVT::i16, Custom); setOperationAction(ISD::SSUBSAT, MVT::i16, Custom); + setOperationAction(ISD::UADDSAT, MVT::i8, Custom); + setOperationAction(ISD::USUBSAT, MVT::i8, Custom); + setOperationAction(ISD::UADDSAT, MVT::i16, Custom); + setOperationAction(ISD::USUBSAT, MVT::i16, Custom); } if (Subtarget->hasBaseDSP()) { setOperationAction(ISD::SADDSAT, MVT::i32, Legal); @@ -1776,6 +1780,10 @@ MAKE_CASE(ARMISD::QSUB16b) MAKE_CASE(ARMISD::QADD8b) MAKE_CASE(ARMISD::QSUB8b) + MAKE_CASE(ARMISD::UQADD16b) + MAKE_CASE(ARMISD::UQSUB16b) + MAKE_CASE(ARMISD::UQADD8b) + MAKE_CASE(ARMISD::UQSUB8b) MAKE_CASE(ARMISD::BUILD_VECTOR) MAKE_CASE(ARMISD::BFI) MAKE_CASE(ARMISD::VORRIMM) @@ -4977,6 +4985,35 @@ return DAG.getNode(ISD::TRUNCATE, dl, VT, Add); } +static SDValue LowerUADDSUBSAT(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + EVT VT = Op.getValueType(); + if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) + return SDValue(); + if (!VT.isSimple()) + return SDValue(); + + unsigned NewOpcode; + bool IsAdd = Op->getOpcode() == ISD::UADDSAT; + switch (VT.getSimpleVT().SimpleTy) { + default: + return SDValue(); + case MVT::i8: + NewOpcode = IsAdd ? ARMISD::UQADD8b : ARMISD::UQSUB8b; + break; + case MVT::i16: + NewOpcode = IsAdd ? ARMISD::UQADD16b : ARMISD::UQSUB16b; + break; + } + + SDLoc dl(Op); + SDValue Add = + DAG.getNode(NewOpcode, dl, MVT::i32, + DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32), + DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Add); +} + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); @@ -10130,6 +10167,9 @@ case ISD::SADDSAT: case ISD::SSUBSAT: return LowerSADDSUBSAT(Op, DAG, Subtarget); + case ISD::UADDSAT: + case ISD::USUBSAT: + return LowerUADDSUBSAT(Op, DAG, Subtarget); case ISD::LOAD: return LowerPredicateLoad(Op, DAG); case ISD::STORE: @@ -10231,6 +10271,10 @@ case ISD::SSUBSAT: Res = LowerSADDSUBSAT(SDValue(N, 0), DAG, Subtarget); break; + case ISD::UADDSAT: + case ISD::USUBSAT: + Res = LowerUADDSUBSAT(SDValue(N, 0), DAG, Subtarget); + break; case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; @@ -17455,7 +17499,9 @@ } case ARMISD::SMLALBB: case ARMISD::QADD16b: - case ARMISD::QSUB16b: { + case ARMISD::QSUB16b: + case ARMISD::UQADD16b: + case ARMISD::UQSUB16b: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) || @@ -17492,7 +17538,9 @@ break; } case ARMISD::QADD8b: - case ARMISD::QSUB8b: { + case ARMISD::QSUB8b: + case ARMISD::UQADD8b: + case ARMISD::UQSUB8b: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8); if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) || diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -240,6 +240,11 @@ def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>; def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>; +def ARMuqadd8b : SDNode<"ARMISD::UQADD8b", SDT_ARMAnd, []>; +def ARMuqsub8b : SDNode<"ARMISD::UQSUB8b", SDT_ARMAnd, []>; +def ARMuqadd16b : SDNode<"ARMISD::UQADD16b", SDT_ARMAnd, []>; +def ARMuqsub16b : SDNode<"ARMISD::UQSUB16b", SDT_ARMAnd, []>; + def SDT_ARMldrd : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; @@ -3945,6 +3950,7 @@ (QDADD rGPR:$Rm, rGPR:$Rn)>; def : ARMV5TEPat<(ssubsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)), (QDSUB rGPR:$Rm, rGPR:$Rn)>; + def : ARMV6Pat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn), (QADD8 rGPR:$Rm, rGPR:$Rn)>; def : ARMV6Pat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn), @@ -3963,6 +3969,16 @@ def UQASX : AAIIntrinsic<0b01100110, 0b11110011, "uqasx", int_arm_uqasx>; def UQSAX : AAIIntrinsic<0b01100110, 0b11110101, "uqsax", int_arm_uqsax>; +def : ARMV6Pat<(ARMuqadd8b rGPR:$Rm, rGPR:$Rn), + (UQADD8 rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMuqsub8b rGPR:$Rm, rGPR:$Rn), + (UQSUB8 rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMuqadd16b rGPR:$Rm, rGPR:$Rn), + (UQADD16 rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMuqsub16b rGPR:$Rm, rGPR:$Rn), + (UQSUB16 rGPR:$Rm, rGPR:$Rn)>; + + // Signed/Unsigned add/subtract def SASX : AAIIntrinsic<0b01100001, 0b11110011, "sasx", int_arm_sasx>; diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -2521,6 +2521,7 @@ (t2QDADD rGPR:$Rm, rGPR:$Rn)>; def : Thumb2DSPPat<(ssubsat rGPR:$Rm, (saddsat rGPR:$Rn, rGPR:$Rn)), (t2QDSUB rGPR:$Rm, rGPR:$Rn)>; + def : Thumb2DSPPat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn), (t2QADD8 rGPR:$Rm, rGPR:$Rn)>; def : Thumb2DSPPat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn), @@ -2530,6 +2531,15 @@ def : Thumb2DSPPat<(ARMqsub16b rGPR:$Rm, rGPR:$Rn), (t2QSUB16 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMuqadd8b rGPR:$Rm, rGPR:$Rn), + (t2UQADD8 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMuqsub8b rGPR:$Rm, rGPR:$Rn), + (t2UQSUB8 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMuqadd16b rGPR:$Rm, rGPR:$Rn), + (t2UQADD16 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMuqsub16b rGPR:$Rm, rGPR:$Rn), + (t2UQSUB16 rGPR:$Rm, rGPR:$Rn)>; + // Signed/Unsigned add/subtract def t2SASX : T2I_pam_intrinsics<0b010, 0b0000, "sasx", int_arm_sasx>; diff --git a/llvm/test/CodeGen/ARM/uadd_sat.ll b/llvm/test/CodeGen/ARM/uadd_sat.ll --- a/llvm/test/CodeGen/ARM/uadd_sat.ll +++ b/llvm/test/CodeGen/ARM/uadd_sat.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-T1 -; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 -; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 +; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2NODSP +; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2DSP ; RUN: llc < %s -mtriple=armv8a-none-eabi | FileCheck %s --check-prefix=CHECK-ARM declare i4 @llvm.uadd.sat.i4(i4, i4) @@ -106,21 +106,25 @@ ; CHECK-T1-NEXT: .LCPI2_0: ; CHECK-T1-NEXT: .long 65535 @ 0xffff ; -; CHECK-T2-LABEL: func16: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: add r1, r0 -; CHECK-T2-NEXT: movw r0, #65535 -; CHECK-T2-NEXT: cmp r1, r0 -; CHECK-T2-NEXT: it lo -; CHECK-T2-NEXT: movlo r0, r1 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func16: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: add r1, r0 +; CHECK-T2NODSP-NEXT: movw r0, #65535 +; CHECK-T2NODSP-NEXT: cmp r1, r0 +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r0, r1 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func16: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: uqadd16 r0, r0, r1 +; CHECK-T2DSP-NEXT: uxth r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func16: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: add r1, r0, r1 -; CHECK-ARM-NEXT: movw r0, #65535 -; CHECK-ARM-NEXT: cmp r1, r0 -; CHECK-ARM-NEXT: movlo r0, r1 +; CHECK-ARM-NEXT: uqadd16 r0, r0, r1 +; CHECK-ARM-NEXT: uxth r0, r0 ; CHECK-ARM-NEXT: bx lr %tmp = call i16 @llvm.uadd.sat.i16(i16 %x, i16 %y) ret i16 %tmp @@ -137,19 +141,24 @@ ; CHECK-T1-NEXT: .LBB3_2: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func8: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: add r0, r1 -; CHECK-T2-NEXT: cmp r0, #255 -; CHECK-T2-NEXT: it hs -; CHECK-T2-NEXT: movhs r0, #255 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func8: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: add r0, r1 +; CHECK-T2NODSP-NEXT: cmp r0, #255 +; CHECK-T2NODSP-NEXT: it hs +; CHECK-T2NODSP-NEXT: movhs r0, #255 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func8: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: uqadd8 r0, r0, r1 +; CHECK-T2DSP-NEXT: uxtb r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func8: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: add r0, r0, r1 -; CHECK-ARM-NEXT: cmp r0, #255 -; CHECK-ARM-NEXT: movhs r0, #255 +; CHECK-ARM-NEXT: uqadd8 r0, r0, r1 +; CHECK-ARM-NEXT: uxtb r0, r0 ; CHECK-ARM-NEXT: bx lr %tmp = call i8 @llvm.uadd.sat.i8(i8 %x, i8 %y) ret i8 %tmp diff --git a/llvm/test/CodeGen/ARM/uadd_sat_plus.ll b/llvm/test/CodeGen/ARM/uadd_sat_plus.ll --- a/llvm/test/CodeGen/ARM/uadd_sat_plus.ll +++ b/llvm/test/CodeGen/ARM/uadd_sat_plus.ll @@ -130,20 +130,15 @@ ; CHECK-T2DSP-LABEL: func16: ; CHECK-T2DSP: @ %bb.0: ; CHECK-T2DSP-NEXT: muls r1, r2, r1 -; CHECK-T2DSP-NEXT: uxtah r1, r0, r1 -; CHECK-T2DSP-NEXT: movw r0, #65535 -; CHECK-T2DSP-NEXT: cmp r1, r0 -; CHECK-T2DSP-NEXT: it lo -; CHECK-T2DSP-NEXT: movlo r0, r1 +; CHECK-T2DSP-NEXT: uqadd16 r0, r0, r1 +; CHECK-T2DSP-NEXT: uxth r0, r0 ; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func16: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: mul r1, r1, r2 -; CHECK-ARM-NEXT: uxtah r1, r0, r1 -; CHECK-ARM-NEXT: movw r0, #65535 -; CHECK-ARM-NEXT: cmp r1, r0 -; CHECK-ARM-NEXT: movlo r0, r1 +; CHECK-ARM-NEXT: uqadd16 r0, r0, r1 +; CHECK-ARM-NEXT: uxth r0, r0 ; CHECK-ARM-NEXT: bx lr %a = mul i16 %y, %z %tmp = call i16 @llvm.uadd.sat.i16(i16 %x, i16 %a) @@ -176,18 +171,15 @@ ; CHECK-T2DSP-LABEL: func8: ; CHECK-T2DSP: @ %bb.0: ; CHECK-T2DSP-NEXT: muls r1, r2, r1 -; CHECK-T2DSP-NEXT: uxtab r0, r0, r1 -; CHECK-T2DSP-NEXT: cmp r0, #255 -; CHECK-T2DSP-NEXT: it hs -; CHECK-T2DSP-NEXT: movhs r0, #255 +; CHECK-T2DSP-NEXT: uqadd8 r0, r0, r1 +; CHECK-T2DSP-NEXT: uxtb r0, r0 ; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func8: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: smulbb r1, r1, r2 -; CHECK-ARM-NEXT: uxtab r0, r0, r1 -; CHECK-ARM-NEXT: cmp r0, #255 -; CHECK-ARM-NEXT: movhs r0, #255 +; CHECK-ARM-NEXT: uqadd8 r0, r0, r1 +; CHECK-ARM-NEXT: uxtb r0, r0 ; CHECK-ARM-NEXT: bx lr %a = mul i8 %y, %z %tmp = call i8 @llvm.uadd.sat.i8(i8 %x, i8 %a) diff --git a/llvm/test/CodeGen/ARM/usub_sat.ll b/llvm/test/CodeGen/ARM/usub_sat.ll --- a/llvm/test/CodeGen/ARM/usub_sat.ll +++ b/llvm/test/CodeGen/ARM/usub_sat.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-T1 -; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 -; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 +; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2NODSP +; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2DSP ; RUN: llc < %s -mtriple=armv8a-none-eabi | FileCheck %s --check-prefix=CHECK-ARM declare i4 @llvm.usub.sat.i4(i4, i4) @@ -100,17 +100,23 @@ ; CHECK-T1-NEXT: .LBB2_2: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func16: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: subs r0, r0, r1 -; CHECK-T2-NEXT: it lo -; CHECK-T2-NEXT: movlo r0, #0 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func16: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: subs r0, r0, r1 +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r0, #0 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func16: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: uqsub16 r0, r0, r1 +; CHECK-T2DSP-NEXT: uxth r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func16: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: subs r0, r0, r1 -; CHECK-ARM-NEXT: movlo r0, #0 +; CHECK-ARM-NEXT: uqsub16 r0, r0, r1 +; CHECK-ARM-NEXT: uxth r0, r0 ; CHECK-ARM-NEXT: bx lr %tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %y) ret i16 %tmp @@ -126,17 +132,23 @@ ; CHECK-T1-NEXT: .LBB3_2: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func8: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: subs r0, r0, r1 -; CHECK-T2-NEXT: it lo -; CHECK-T2-NEXT: movlo r0, #0 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func8: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: subs r0, r0, r1 +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r0, #0 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func8: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: uqsub8 r0, r0, r1 +; CHECK-T2DSP-NEXT: uxtb r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func8: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: subs r0, r0, r1 -; CHECK-ARM-NEXT: movlo r0, #0 +; CHECK-ARM-NEXT: uqsub8 r0, r0, r1 +; CHECK-ARM-NEXT: uxtb r0, r0 ; CHECK-ARM-NEXT: bx lr %tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %y) ret i8 %tmp diff --git a/llvm/test/CodeGen/ARM/usub_sat_plus.ll b/llvm/test/CodeGen/ARM/usub_sat_plus.ll --- a/llvm/test/CodeGen/ARM/usub_sat_plus.ll +++ b/llvm/test/CodeGen/ARM/usub_sat_plus.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-T1 -; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 -; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 +; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2NODSP +; RUN: llc < %s -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK-T2DSP ; RUN: llc < %s -mtriple=armv8a-none-eabi | FileCheck %s --check-prefix=CHECK-ARM declare i4 @llvm.usub.sat.i4(i4, i4) @@ -112,21 +112,27 @@ ; CHECK-T1-NEXT: .LBB2_2: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func16: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: muls r1, r2, r1 -; CHECK-T2-NEXT: uxth r1, r1 -; CHECK-T2-NEXT: subs r0, r0, r1 -; CHECK-T2-NEXT: it lo -; CHECK-T2-NEXT: movlo r0, #0 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func16: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: muls r1, r2, r1 +; CHECK-T2NODSP-NEXT: uxth r1, r1 +; CHECK-T2NODSP-NEXT: subs r0, r0, r1 +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r0, #0 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func16: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: muls r1, r2, r1 +; CHECK-T2DSP-NEXT: uqsub16 r0, r0, r1 +; CHECK-T2DSP-NEXT: uxth r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func16: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: mul r1, r1, r2 -; CHECK-ARM-NEXT: uxth r1, r1 -; CHECK-ARM-NEXT: subs r0, r0, r1 -; CHECK-ARM-NEXT: movlo r0, #0 +; CHECK-ARM-NEXT: uqsub16 r0, r0, r1 +; CHECK-ARM-NEXT: uxth r0, r0 ; CHECK-ARM-NEXT: bx lr %a = mul i16 %y, %z %tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %a) @@ -145,21 +151,27 @@ ; CHECK-T1-NEXT: .LBB3_2: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func8: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: muls r1, r2, r1 -; CHECK-T2-NEXT: uxtb r1, r1 -; CHECK-T2-NEXT: subs r0, r0, r1 -; CHECK-T2-NEXT: it lo -; CHECK-T2-NEXT: movlo r0, #0 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func8: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: muls r1, r2, r1 +; CHECK-T2NODSP-NEXT: uxtb r1, r1 +; CHECK-T2NODSP-NEXT: subs r0, r0, r1 +; CHECK-T2NODSP-NEXT: it lo +; CHECK-T2NODSP-NEXT: movlo r0, #0 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func8: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: muls r1, r2, r1 +; CHECK-T2DSP-NEXT: uqsub8 r0, r0, r1 +; CHECK-T2DSP-NEXT: uxtb r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func8: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: smulbb r1, r1, r2 -; CHECK-ARM-NEXT: uxtb r1, r1 -; CHECK-ARM-NEXT: subs r0, r0, r1 -; CHECK-ARM-NEXT: movlo r0, #0 +; CHECK-ARM-NEXT: uqsub8 r0, r0, r1 +; CHECK-ARM-NEXT: uxtb r0, r0 ; CHECK-ARM-NEXT: bx lr %a = mul i8 %y, %z %tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %a)