Index: llvm/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.h +++ llvm/lib/Target/ARM/ARMISelLowering.h @@ -218,6 +218,12 @@ SMMLAR, // Signed multiply long, round and add SMMLSR, // Signed multiply long, subtract and round + // Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b stands for. + QADD8b, + QSUB8b, + QADD16b, + QSUB16b, + // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other // operations, but for ARM some BUILD_VECTORs are legal as-is and their Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1021,6 +1021,12 @@ setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); + if (Subtarget->hasDSP()) { + setOperationAction(ISD::SADDSAT, MVT::i8, Custom); + setOperationAction(ISD::SSUBSAT, MVT::i8, Custom); + setOperationAction(ISD::SADDSAT, MVT::i16, Custom); + setOperationAction(ISD::SSUBSAT, MVT::i16, Custom); + } // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); @@ -1621,6 +1627,10 @@ case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX"; case ARMISD::SMMLAR: return "ARMISD::SMMLAR"; case ARMISD::SMMLSR: return "ARMISD::SMMLSR"; + case ARMISD::QADD16b: return "ARMISD::QADD16b"; + case ARMISD::QSUB16b: return "ARMISD::QSUB16b"; + case ARMISD::QADD8b: return "ARMISD::QADD8b"; + case ARMISD::QSUB8b: return "ARMISD::QSUB8b"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; @@ -4444,6 +4454,35 @@ return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } +static SDValue LowerSADDSUBSAT(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + EVT VT = Op.getValueType(); + if (!Subtarget->hasDSP()) + return SDValue(); + if (!VT.isSimple() || !VT.isInteger() || VT.getScalarSizeInBits() > 32) + return SDValue(); + + unsigned NewOpcode; + bool IsAdd = Op->getOpcode() == ISD::SADDSAT; + switch (VT.getSimpleVT().SimpleTy) { + default: + return SDValue(); + case MVT::i8: + NewOpcode = IsAdd ? ARMISD::QADD8b : ARMISD::QSUB8b; + break; + case MVT::i16: + NewOpcode = IsAdd ? ARMISD::QADD16b : ARMISD::QSUB16b; + break; + } + + SDLoc dl(Op); + SDValue Add = + DAG.getNode(NewOpcode, dl, MVT::i32, + DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32), + DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Add); +} + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); @@ -9080,6 +9119,9 @@ case ISD::UADDO: case ISD::USUBO: return LowerUnsignedALUO(Op, DAG); + case ISD::SADDSAT: + case ISD::SSUBSAT: + return LowerSADDSUBSAT(Op, DAG, Subtarget); case ISD::LOAD: return LowerPredicateLoad(Op, DAG); case ISD::STORE: @@ -9164,6 +9206,10 @@ Results.push_back(Res.getValue(0)); Results.push_back(Res.getValue(1)); return; + case ISD::SADDSAT: + case ISD::SSUBSAT: + Res = LowerSADDSUBSAT(SDValue(N, 0), DAG, Subtarget); + break; case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; @@ -14341,7 +14387,9 @@ return SDValue(); break; } - case ARMISD::SMLALBB: { + case ARMISD::SMLALBB: + case ARMISD::QADD16b: + case ARMISD::QSUB16b: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) || @@ -14377,6 +14425,15 @@ return SDValue(); break; } + case ARMISD::QADD8b: + case ARMISD::QSUB8b: { + unsigned BitWidth = N->getValueType(0).getSizeInBits(); + APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8); + if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) || + (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))) + return SDValue(); + break; + } case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast(N->getOperand(1))->getZExtValue()) { Index: llvm/lib/Target/ARM/ARMInstrInfo.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrInfo.td +++ llvm/lib/Target/ARM/ARMInstrInfo.td @@ -238,6 +238,11 @@ def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>; def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>; +def ARMqadd8b : SDNode<"ARMISD::QADD8b", SDT_ARMAnd, []>; +def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>; +def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>; +def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>; + // Vector operations shared between NEON and MVE def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; @@ -3750,6 +3755,15 @@ [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))]>; } +def : ARMV6Pat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn), + (QADD8 rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn), + (QSUB8 rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMqadd16b rGPR:$Rm, rGPR:$Rn), + (QADD16 rGPR:$Rm, rGPR:$Rn)>; +def : ARMV6Pat<(ARMqsub16b rGPR:$Rm, rGPR:$Rn), + (QSUB16 rGPR:$Rm, rGPR:$Rn)>; + def UQADD16 : AAIIntrinsic<0b01100110, 0b11110001, "uqadd16", int_arm_uqadd16>; def UQADD8 : AAIIntrinsic<0b01100110, 0b11111001, "uqadd8", int_arm_uqadd8>; def UQSUB16 : AAIIntrinsic<0b01100110, 0b11110111, "uqsub16", int_arm_uqsub16>; Index: llvm/lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrThumb2.td +++ llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -2395,6 +2395,15 @@ def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, (int_arm_qadd rGPR:$Rn, rGPR:$Rn)), (t2QDSUB rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn), + (t2QADD8 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn), + (t2QSUB8 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMqadd16b rGPR:$Rm, rGPR:$Rn), + (t2QADD16 rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(ARMqsub16b rGPR:$Rm, rGPR:$Rn), + (t2QSUB16 rGPR:$Rm, rGPR:$Rn)>; + // Signed/Unsigned add/subtract def t2SASX : T2I_pam_intrinsics<0b010, 0b0000, "sasx", int_arm_sasx>; Index: llvm/test/CodeGen/ARM/sadd_sat.ll =================================================================== --- llvm/test/CodeGen/ARM/sadd_sat.ll +++ llvm/test/CodeGen/ARM/sadd_sat.ll @@ -231,30 +231,30 @@ ; CHECK-T1-NEXT: .LCPI2_1: ; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000 ; -; CHECK-T2-LABEL: func16: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: add r0, r1 -; CHECK-T2-NEXT: movw r1, #32767 -; CHECK-T2-NEXT: cmp r0, r1 -; CHECK-T2-NEXT: it lt -; CHECK-T2-NEXT: movlt r1, r0 -; CHECK-T2-NEXT: movw r0, #32768 -; CHECK-T2-NEXT: cmn.w r1, #32768 -; CHECK-T2-NEXT: movt r0, #65535 -; CHECK-T2-NEXT: it gt -; CHECK-T2-NEXT: movgt r0, r1 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func16: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: add r0, r1 +; CHECK-T2NODSP-NEXT: movw r1, #32767 +; CHECK-T2NODSP-NEXT: cmp r0, r1 +; CHECK-T2NODSP-NEXT: it lt +; CHECK-T2NODSP-NEXT: movlt r1, r0 +; CHECK-T2NODSP-NEXT: movw r0, #32768 +; CHECK-T2NODSP-NEXT: cmn.w r1, #32768 +; CHECK-T2NODSP-NEXT: movt r0, #65535 +; CHECK-T2NODSP-NEXT: it gt +; CHECK-T2NODSP-NEXT: movgt r0, r1 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func16: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: qadd16 r0, r0, r1 +; CHECK-T2DSP-NEXT: sxth r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func16: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: add r0, r0, r1 -; CHECK-ARM-NEXT: movw r1, #32767 -; CHECK-ARM-NEXT: cmp r0, r1 -; CHECK-ARM-NEXT: movlt r1, r0 -; CHECK-ARM-NEXT: movw r0, #32768 -; CHECK-ARM-NEXT: movt r0, #65535 -; CHECK-ARM-NEXT: cmn r1, #32768 -; CHECK-ARM-NEXT: movgt r0, r1 +; CHECK-ARM-NEXT: qadd16 r0, r0, r1 +; CHECK-ARM-NEXT: sxth r0, r0 ; CHECK-ARM-NEXT: bx lr %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %y) ret i16 %tmp @@ -278,24 +278,27 @@ ; CHECK-T1-NEXT: .LBB3_4: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func8: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: add r0, r1 -; CHECK-T2-NEXT: cmp r0, #127 -; CHECK-T2-NEXT: it ge -; CHECK-T2-NEXT: movge r0, #127 -; CHECK-T2-NEXT: cmn.w r0, #128 -; CHECK-T2-NEXT: it le -; CHECK-T2-NEXT: mvnle r0, #127 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func8: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: add r0, r1 +; CHECK-T2NODSP-NEXT: cmp r0, #127 +; CHECK-T2NODSP-NEXT: it ge +; CHECK-T2NODSP-NEXT: movge r0, #127 +; CHECK-T2NODSP-NEXT: cmn.w r0, #128 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: mvnle r0, #127 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func8: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: qadd8 r0, r0, r1 +; CHECK-T2DSP-NEXT: sxtb r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func8: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: add r0, r0, r1 -; CHECK-ARM-NEXT: cmp r0, #127 -; CHECK-ARM-NEXT: movge r0, #127 -; CHECK-ARM-NEXT: cmn r0, #128 -; CHECK-ARM-NEXT: mvnle r0, #127 +; CHECK-ARM-NEXT: qadd8 r0, r0, r1 +; CHECK-ARM-NEXT: sxtb r0, r0 ; CHECK-ARM-NEXT: bx lr %tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %y) ret i8 %tmp Index: llvm/test/CodeGen/ARM/sadd_sat_plus.ll =================================================================== --- llvm/test/CodeGen/ARM/sadd_sat_plus.ll +++ llvm/test/CodeGen/ARM/sadd_sat_plus.ll @@ -258,29 +258,15 @@ ; CHECK-T2DSP-LABEL: func16: ; CHECK-T2DSP: @ %bb.0: ; CHECK-T2DSP-NEXT: muls r1, r2, r1 -; CHECK-T2DSP-NEXT: sxtah r0, r0, r1 -; CHECK-T2DSP-NEXT: movw r1, #32767 -; CHECK-T2DSP-NEXT: cmp r0, r1 -; CHECK-T2DSP-NEXT: it lt -; CHECK-T2DSP-NEXT: movlt r1, r0 -; CHECK-T2DSP-NEXT: movw r0, #32768 -; CHECK-T2DSP-NEXT: cmn.w r1, #32768 -; CHECK-T2DSP-NEXT: movt r0, #65535 -; CHECK-T2DSP-NEXT: it gt -; CHECK-T2DSP-NEXT: movgt r0, r1 +; CHECK-T2DSP-NEXT: qadd16 r0, r0, r1 +; CHECK-T2DSP-NEXT: sxth r0, r0 ; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func16: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: smulbb r1, r1, r2 -; CHECK-ARM-NEXT: sxtah r0, r0, r1 -; CHECK-ARM-NEXT: movw r1, #32767 -; CHECK-ARM-NEXT: cmp r0, r1 -; CHECK-ARM-NEXT: movlt r1, r0 -; CHECK-ARM-NEXT: movw r0, #32768 -; CHECK-ARM-NEXT: movt r0, #65535 -; CHECK-ARM-NEXT: cmn r1, #32768 -; CHECK-ARM-NEXT: movgt r0, r1 +; CHECK-ARM-NEXT: qadd16 r0, r0, r1 +; CHECK-ARM-NEXT: sxth r0, r0 ; CHECK-ARM-NEXT: bx lr %a = mul i16 %y, %z %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %a) @@ -323,23 +309,15 @@ ; CHECK-T2DSP-LABEL: func8: ; CHECK-T2DSP: @ %bb.0: ; CHECK-T2DSP-NEXT: muls r1, r2, r1 -; CHECK-T2DSP-NEXT: sxtab r0, r0, r1 -; CHECK-T2DSP-NEXT: cmp r0, #127 -; CHECK-T2DSP-NEXT: it ge -; CHECK-T2DSP-NEXT: movge r0, #127 -; CHECK-T2DSP-NEXT: cmn.w r0, #128 -; CHECK-T2DSP-NEXT: it le -; CHECK-T2DSP-NEXT: mvnle r0, #127 +; CHECK-T2DSP-NEXT: qadd8 r0, r0, r1 +; CHECK-T2DSP-NEXT: sxtb r0, r0 ; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func8: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: smulbb r1, r1, r2 -; CHECK-ARM-NEXT: sxtab r0, r0, r1 -; CHECK-ARM-NEXT: cmp r0, #127 -; CHECK-ARM-NEXT: movge r0, #127 -; CHECK-ARM-NEXT: cmn r0, #128 -; CHECK-ARM-NEXT: mvnle r0, #127 +; CHECK-ARM-NEXT: qadd8 r0, r0, r1 +; CHECK-ARM-NEXT: sxtb r0, r0 ; CHECK-ARM-NEXT: bx lr %a = mul i8 %y, %z %tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %a) Index: llvm/test/CodeGen/ARM/ssub_sat.ll =================================================================== --- llvm/test/CodeGen/ARM/ssub_sat.ll +++ llvm/test/CodeGen/ARM/ssub_sat.ll @@ -233,30 +233,30 @@ ; CHECK-T1-NEXT: .LCPI2_1: ; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000 ; -; CHECK-T2-LABEL: func16: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: subs r0, r0, r1 -; CHECK-T2-NEXT: movw r1, #32767 -; CHECK-T2-NEXT: cmp r0, r1 -; CHECK-T2-NEXT: it lt -; CHECK-T2-NEXT: movlt r1, r0 -; CHECK-T2-NEXT: movw r0, #32768 -; CHECK-T2-NEXT: cmn.w r1, #32768 -; CHECK-T2-NEXT: movt r0, #65535 -; CHECK-T2-NEXT: it gt -; CHECK-T2-NEXT: movgt r0, r1 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func16: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: subs r0, r0, r1 +; CHECK-T2NODSP-NEXT: movw r1, #32767 +; CHECK-T2NODSP-NEXT: cmp r0, r1 +; CHECK-T2NODSP-NEXT: it lt +; CHECK-T2NODSP-NEXT: movlt r1, r0 +; CHECK-T2NODSP-NEXT: movw r0, #32768 +; CHECK-T2NODSP-NEXT: cmn.w r1, #32768 +; CHECK-T2NODSP-NEXT: movt r0, #65535 +; CHECK-T2NODSP-NEXT: it gt +; CHECK-T2NODSP-NEXT: movgt r0, r1 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func16: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: qsub16 r0, r0, r1 +; CHECK-T2DSP-NEXT: sxth r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func16: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: sub r0, r0, r1 -; CHECK-ARM-NEXT: movw r1, #32767 -; CHECK-ARM-NEXT: cmp r0, r1 -; CHECK-ARM-NEXT: movlt r1, r0 -; CHECK-ARM-NEXT: movw r0, #32768 -; CHECK-ARM-NEXT: movt r0, #65535 -; CHECK-ARM-NEXT: cmn r1, #32768 -; CHECK-ARM-NEXT: movgt r0, r1 +; CHECK-ARM-NEXT: qsub16 r0, r0, r1 +; CHECK-ARM-NEXT: sxth r0, r0 ; CHECK-ARM-NEXT: bx lr %tmp = call i16 @llvm.ssub.sat.i16(i16 %x, i16 %y) ret i16 %tmp @@ -280,24 +280,27 @@ ; CHECK-T1-NEXT: .LBB3_4: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func8: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: subs r0, r0, r1 -; CHECK-T2-NEXT: cmp r0, #127 -; CHECK-T2-NEXT: it ge -; CHECK-T2-NEXT: movge r0, #127 -; CHECK-T2-NEXT: cmn.w r0, #128 -; CHECK-T2-NEXT: it le -; CHECK-T2-NEXT: mvnle r0, #127 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func8: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: subs r0, r0, r1 +; CHECK-T2NODSP-NEXT: cmp r0, #127 +; CHECK-T2NODSP-NEXT: it ge +; CHECK-T2NODSP-NEXT: movge r0, #127 +; CHECK-T2NODSP-NEXT: cmn.w r0, #128 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: mvnle r0, #127 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func8: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: qsub8 r0, r0, r1 +; CHECK-T2DSP-NEXT: sxtb r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func8: ; CHECK-ARM: @ %bb.0: -; CHECK-ARM-NEXT: sub r0, r0, r1 -; CHECK-ARM-NEXT: cmp r0, #127 -; CHECK-ARM-NEXT: movge r0, #127 -; CHECK-ARM-NEXT: cmn r0, #128 -; CHECK-ARM-NEXT: mvnle r0, #127 +; CHECK-ARM-NEXT: qsub8 r0, r0, r1 +; CHECK-ARM-NEXT: sxtb r0, r0 ; CHECK-ARM-NEXT: bx lr %tmp = call i8 @llvm.ssub.sat.i8(i8 %x, i8 %y) ret i8 %tmp Index: llvm/test/CodeGen/ARM/ssub_sat_plus.ll =================================================================== --- llvm/test/CodeGen/ARM/ssub_sat_plus.ll +++ llvm/test/CodeGen/ARM/ssub_sat_plus.ll @@ -245,34 +245,34 @@ ; CHECK-T1-NEXT: .LCPI2_1: ; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000 ; -; CHECK-T2-LABEL: func16: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: muls r1, r2, r1 -; CHECK-T2-NEXT: sxth r1, r1 -; CHECK-T2-NEXT: subs r0, r0, r1 -; CHECK-T2-NEXT: movw r1, #32767 -; CHECK-T2-NEXT: cmp r0, r1 -; CHECK-T2-NEXT: it lt -; CHECK-T2-NEXT: movlt r1, r0 -; CHECK-T2-NEXT: movw r0, #32768 -; CHECK-T2-NEXT: movt r0, #65535 -; CHECK-T2-NEXT: cmn.w r1, #32768 -; CHECK-T2-NEXT: it gt -; CHECK-T2-NEXT: movgt r0, r1 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func16: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: muls r1, r2, r1 +; CHECK-T2NODSP-NEXT: sxth r1, r1 +; CHECK-T2NODSP-NEXT: subs r0, r0, r1 +; CHECK-T2NODSP-NEXT: movw r1, #32767 +; CHECK-T2NODSP-NEXT: cmp r0, r1 +; CHECK-T2NODSP-NEXT: it lt +; CHECK-T2NODSP-NEXT: movlt r1, r0 +; CHECK-T2NODSP-NEXT: movw r0, #32768 +; CHECK-T2NODSP-NEXT: movt r0, #65535 +; CHECK-T2NODSP-NEXT: cmn.w r1, #32768 +; CHECK-T2NODSP-NEXT: it gt +; CHECK-T2NODSP-NEXT: movgt r0, r1 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func16: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: muls r1, r2, r1 +; CHECK-T2DSP-NEXT: qsub16 r0, r0, r1 +; CHECK-T2DSP-NEXT: sxth r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func16: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: smulbb r1, r1, r2 -; CHECK-ARM-NEXT: sxth r1, r1 -; CHECK-ARM-NEXT: sub r0, r0, r1 -; CHECK-ARM-NEXT: movw r1, #32767 -; CHECK-ARM-NEXT: cmp r0, r1 -; CHECK-ARM-NEXT: movlt r1, r0 -; CHECK-ARM-NEXT: movw r0, #32768 -; CHECK-ARM-NEXT: movt r0, #65535 -; CHECK-ARM-NEXT: cmn r1, #32768 -; CHECK-ARM-NEXT: movgt r0, r1 +; CHECK-ARM-NEXT: qsub16 r0, r0, r1 +; CHECK-ARM-NEXT: sxth r0, r0 ; CHECK-ARM-NEXT: bx lr %a = mul i16 %y, %z %tmp = call i16 @llvm.ssub.sat.i16(i16 %x, i16 %a) @@ -299,28 +299,31 @@ ; CHECK-T1-NEXT: .LBB3_4: ; CHECK-T1-NEXT: bx lr ; -; CHECK-T2-LABEL: func8: -; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: muls r1, r2, r1 -; CHECK-T2-NEXT: sxtb r1, r1 -; CHECK-T2-NEXT: subs r0, r0, r1 -; CHECK-T2-NEXT: cmp r0, #127 -; CHECK-T2-NEXT: it ge -; CHECK-T2-NEXT: movge r0, #127 -; CHECK-T2-NEXT: cmn.w r0, #128 -; CHECK-T2-NEXT: it le -; CHECK-T2-NEXT: mvnle r0, #127 -; CHECK-T2-NEXT: bx lr +; CHECK-T2NODSP-LABEL: func8: +; CHECK-T2NODSP: @ %bb.0: +; CHECK-T2NODSP-NEXT: muls r1, r2, r1 +; CHECK-T2NODSP-NEXT: sxtb r1, r1 +; CHECK-T2NODSP-NEXT: subs r0, r0, r1 +; CHECK-T2NODSP-NEXT: cmp r0, #127 +; CHECK-T2NODSP-NEXT: it ge +; CHECK-T2NODSP-NEXT: movge r0, #127 +; CHECK-T2NODSP-NEXT: cmn.w r0, #128 +; CHECK-T2NODSP-NEXT: it le +; CHECK-T2NODSP-NEXT: mvnle r0, #127 +; CHECK-T2NODSP-NEXT: bx lr +; +; CHECK-T2DSP-LABEL: func8: +; CHECK-T2DSP: @ %bb.0: +; CHECK-T2DSP-NEXT: muls r1, r2, r1 +; CHECK-T2DSP-NEXT: qsub8 r0, r0, r1 +; CHECK-T2DSP-NEXT: sxtb r0, r0 +; CHECK-T2DSP-NEXT: bx lr ; ; CHECK-ARM-LABEL: func8: ; CHECK-ARM: @ %bb.0: ; CHECK-ARM-NEXT: smulbb r1, r1, r2 -; CHECK-ARM-NEXT: sxtb r1, r1 -; CHECK-ARM-NEXT: sub r0, r0, r1 -; CHECK-ARM-NEXT: cmp r0, #127 -; CHECK-ARM-NEXT: movge r0, #127 -; CHECK-ARM-NEXT: cmn r0, #128 -; CHECK-ARM-NEXT: mvnle r0, #127 +; CHECK-ARM-NEXT: qsub8 r0, r0, r1 +; CHECK-ARM-NEXT: sxtb r0, r0 ; CHECK-ARM-NEXT: bx lr %a = mul i8 %y, %z %tmp = call i8 @llvm.ssub.sat.i8(i8 %x, i8 %a)