Index: include/llvm/IR/IntrinsicsARM.td =================================================================== --- include/llvm/IR/IntrinsicsARM.td +++ include/llvm/IR/IntrinsicsARM.td @@ -22,18 +22,184 @@ // and return value are essentially chains, used to force ordering during ISel. def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +// 16-bit multiplications +def int_arm_smulbb : GCCBuiltin<"__builtin_arm_smulbb">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_smulbt : GCCBuiltin<"__builtin_arm_smulbt">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_smultb : GCCBuiltin<"__builtin_arm_smultb">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_smultt : GCCBuiltin<"__builtin_arm_smultt">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_smulwb : GCCBuiltin<"__builtin_arm_smulwb">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_smulwt : GCCBuiltin<"__builtin_arm_smulwt">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + //===----------------------------------------------------------------------===// // Saturating Arithmetic def int_arm_qadd : GCCBuiltin<"__builtin_arm_qadd">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, Commutative]>; + [Commutative]>; def int_arm_qsub : GCCBuiltin<"__builtin_arm_qsub">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; def int_arm_ssat : GCCBuiltin<"__builtin_arm_ssat">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; def int_arm_usat : GCCBuiltin<"__builtin_arm_usat">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +// Accumulating multiplications +def int_arm_smlabb : GCCBuiltin<"__builtin_arm_smlabb">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_smlabt : GCCBuiltin<"__builtin_arm_smlabt">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_smlatb : GCCBuiltin<"__builtin_arm_smlatb">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_smlatt : GCCBuiltin<"__builtin_arm_smlatt">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_smlawb : GCCBuiltin<"__builtin_arm_smlawb">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_smlawt : GCCBuiltin<"__builtin_arm_smlawt">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + +// Parallel 16-bit saturation + +def int_arm_ssat16 : GCCBuiltin<"__builtin_arm_ssat16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_usat16 : GCCBuiltin<"__builtin_arm_usat16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +// Packing and unpacking + +def int_arm_sxtab16 : GCCBuiltin<"__builtin_arm_sxtab16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_sxtb16 : GCCBuiltin<"__builtin_arm_sxtb16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_arm_uxtab16 : GCCBuiltin<"__builtin_arm_uxtab16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uxtb16 : GCCBuiltin<"__builtin_arm_uxtb16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + +// Parallel selection + +def int_arm_sel : GCCBuiltin<"__builtin_arm_sel">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>; + +// Parallel 8-bit addition and subtraction + +def int_arm_qadd8 : GCCBuiltin<"__builtin_arm_qadd8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_qsub8 : GCCBuiltin<"__builtin_arm_qsub8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_sadd8 : GCCBuiltin<"__builtin_arm_sadd8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_shadd8 : GCCBuiltin<"__builtin_arm_shadd8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_shsub8 : GCCBuiltin<"__builtin_arm_shsub8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_ssub8 : GCCBuiltin<"__builtin_arm_ssub8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_uadd8 : GCCBuiltin<"__builtin_arm_uadd8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_uhadd8 : GCCBuiltin<"__builtin_arm_uhadd8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uhsub8 : GCCBuiltin<"__builtin_arm_uhsub8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uqadd8 : GCCBuiltin<"__builtin_arm_uqadd8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uqsub8 : GCCBuiltin<"__builtin_arm_uqsub8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_usub8 : GCCBuiltin<"__builtin_arm_usub8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +// Sum of 8-bit absolute differences +def int_arm_usad8 : GCCBuiltin<"__builtin_arm_usad8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_usada8 : GCCBuiltin<"__builtin_arm_usada8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + +// Parallel 16-bit addition and subtraction +def int_arm_qadd16 : GCCBuiltin<"__builtin_arm_qadd16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_qasx : GCCBuiltin<"__builtin_arm_qasx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_qsax : GCCBuiltin<"__builtin_arm_qsax">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_qsub16 : GCCBuiltin<"__builtin_arm_qsub16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_sadd16 : GCCBuiltin<"__builtin_arm_sadd16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_sasx : GCCBuiltin<"__builtin_arm_sasx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_shadd16 : GCCBuiltin<"__builtin_arm_shadd16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_shasx : GCCBuiltin<"__builtin_arm_shasx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_shsax : GCCBuiltin<"__builtin_arm_shsax">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_shsub16 : GCCBuiltin<"__builtin_arm_shsub16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_ssax : GCCBuiltin<"__builtin_arm_ssax">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_ssub16 : GCCBuiltin<"__builtin_arm_ssub16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_uadd16 : GCCBuiltin<"__builtin_arm_uadd16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_uasx : GCCBuiltin<"__builtin_arm_uasx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_uhadd16 : GCCBuiltin<"__builtin_arm_uhadd16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uhasx : GCCBuiltin<"__builtin_arm_uhasx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uhsax : GCCBuiltin<"__builtin_arm_uhsax">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uhsub16 : GCCBuiltin<"__builtin_arm_uhsub16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uqadd16 : GCCBuiltin<"__builtin_arm_uqadd16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uqasx : GCCBuiltin<"__builtin_arm_uqasx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uqsax : GCCBuiltin<"__builtin_arm_uqsax">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uqsub16 : GCCBuiltin<"__builtin_arm_uqsub16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_usax : GCCBuiltin<"__builtin_arm_usax">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_usub16 : GCCBuiltin<"__builtin_arm_usub16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +// Parallel 16-bit multiplication +def int_arm_smlad : GCCBuiltin<"__builtin_arm_smlad">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_smladx : GCCBuiltin<"__builtin_arm_smladx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_smlald : GCCBuiltin<"__builtin_arm_smlald">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_arm_smlaldx : GCCBuiltin<"__builtin_arm_smlaldx">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_arm_smlsd : GCCBuiltin<"__builtin_arm_smlsd">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_smlsdx : GCCBuiltin<"__builtin_arm_smlsdx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_smlsld : GCCBuiltin<"__builtin_arm_smlsld">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_arm_smlsldx : GCCBuiltin<"__builtin_arm_smlsldx">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_arm_smuad : GCCBuiltin<"__builtin_arm_smuad">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_smuadx : GCCBuiltin<"__builtin_arm_smuadx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_smusd : GCCBuiltin<"__builtin_arm_smusd">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_smusdx : GCCBuiltin<"__builtin_arm_smusdx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + //===----------------------------------------------------------------------===// // Load, Store and Clear exclusive Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -184,6 +184,10 @@ SMLALBT, // 64-bit signed accumulate multiply bottom, top 16 SMLALTB, // 64-bit signed accumulate multiply top, bottom 16 SMLALTT, // 64-bit signed accumulate multiply top, top 16 + SMLALD, // Signed multiply accumulate long dual + SMLALDX, // Signed multiply accumulate long dual exchange + SMLSLD, // Signed multiply subtract long dual + SMLSLDX, // Signed multiply subtract long dual exchange // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -821,6 +821,7 @@ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL, MVT::i64, Custom); setOperationAction(ISD::SRA, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); setOperationAction(ISD::ADDC, MVT::i32, Custom); setOperationAction(ISD::ADDE, MVT::i32, Custom); @@ -1343,6 +1344,10 @@ case ARMISD::SMLALTT: return "ARMISD::SMLALTT"; case ARMISD::SMULWB: return "ARMISD::SMULWB"; case ARMISD::SMULWT: return "ARMISD::SMULWT"; + case ARMISD::SMLALD: return "ARMISD::SMLALD"; + case ARMISD::SMLALDX: return "ARMISD::SMLALDX"; + case ARMISD::SMLSLD: return "ARMISD::SMLSLD"; + case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; @@ -3353,6 +3358,31 @@ case Intrinsic::arm_neon_vtbl2: return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::arm_smlald: + case Intrinsic::arm_smlaldx: + case Intrinsic::arm_smlsld: + case Intrinsic::arm_smlsldx: { + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Op.getOperand(3), + DAG.getConstant(0, dl, MVT::i32)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Op.getOperand(3), + DAG.getConstant(1, dl, MVT::i32)); + unsigned Opc = 0; + if (IntNo == Intrinsic::arm_smlald) + Opc = ARMISD::SMLALD; + else if (IntNo == Intrinsic::arm_smlaldx) + Opc = ARMISD::SMLALDX; + else if (IntNo == Intrinsic::arm_smlsld) + Opc = ARMISD::SMLSLD; + else + Opc = ARMISD::SMLSLDX; + + return DAG.getNode(Opc, dl, + DAG.getVTList(MVT::i32, MVT::i32), + Op.getOperand(1), Op.getOperand(2), + Lo, Hi); + } } } @@ -7758,6 +7788,11 @@ case ISD::ATOMIC_CMP_SWAP: ReplaceCMP_SWAP_64Results(N, Results, DAG); return; + case ISD::INTRINSIC_WO_CHAIN: + Res = LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG, Subtarget); + Results.push_back(Res.getValue(0)); + Results.push_back(Res.getValue(1)); + return; } if (Res.getNode()) Results.push_back(Res); Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== --- lib/Target/ARM/ARMInstrInfo.td +++ lib/Target/ARM/ARMInstrInfo.td @@ -99,6 +99,11 @@ SDTCisSameAs<0, 4>, SDTCisSameAs<0, 5>]>; +def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>; +def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>; +def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>; +def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; @@ -866,7 +871,10 @@ MVT::i32); }]>; def Imm1_16AsmOperand: ImmAsmOperandMinusOne<1,16> { let Name = "Imm1_16"; } -def imm1_16 : Operand, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }], +def imm1_16 : Operand, PatLeaf<(imm), [{ + uint64_t Imm = N->getZExtValue(); + return Imm > 0 && Imm <= 16; + }], imm1_16_XFORM> { let PrintMethod = "printImmPlusOneOperand"; let ParserMatchClass = Imm1_16AsmOperand; @@ -1979,7 +1987,9 @@ def : InstAlias<"esb$p", (HINT 16, pred:$p)>, Requires<[IsARM, HasRAS]>; def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", - "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> { + "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (int_arm_sel GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV6]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -3468,8 +3478,12 @@ (SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">; +def : Pat<(int_arm_sxtb16 GPR:$Src), + (SXTB16 GPR:$Src, 0)>; def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">; +def : Pat<(int_arm_sxtab16 GPR:$LHS, GPR:$RHS), + (SXTAB16 GPR:$LHS, GPR:$RHS, 0)>; // Zero extenders @@ -3489,6 +3503,8 @@ // (UXTB16r_rot GPR:$Src, 3)>; def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), (UXTB16 GPR:$Src, 1)>; +def : ARMV6Pat<(int_arm_uxtb16 GPR:$Src), + (UXTB16 GPR:$Src, 0)>; def UXTAB : AI_exta_rrot<0b01101110, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; @@ -3503,6 +3519,8 @@ // This isn't safe in general, the add is two 16-bit units, not a 32-bit add. def UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">; +def : ARMV6Pat<(int_arm_uxtab16 GPR:$LHS, GPR:$RHS), + (UXTAB16 GPR:$LHS, GPR:$RHS, 0)>; def SBFX : I<(outs GPRnopc:$Rd), @@ -3629,71 +3647,85 @@ let Unpredictable{11-8} = 0b1111; } -// Saturating add/subtract +// Wrappers around the AAI class +class AAIRevOpr op27_20, bits<8> op11_4, string opc, + list pattern = []> + : AAI; + +class AAIIntrinsic op27_20, bits<8> op11_4, string opc, + Intrinsic intrinsic> + : AAI; +// Saturating add/subtract +let hasSideEffects = 1 in { +def QADD8 : AAIIntrinsic<0b01100010, 0b11111001, "qadd8", int_arm_qadd8>; +def QADD16 : AAIIntrinsic<0b01100010, 0b11110001, "qadd16", int_arm_qadd16>; +def QSUB16 : AAIIntrinsic<0b01100010, 0b11110111, "qsub16", int_arm_qsub16>; +def QSUB8 : AAIIntrinsic<0b01100010, 0b11111111, "qsub8", int_arm_qsub8>; + +def QDADD : AAIRevOpr<0b00010100, 0b00000101, "qdadd", + [(set GPRnopc:$Rd, (int_arm_qadd (int_arm_qadd GPRnopc:$Rm, + GPRnopc:$Rm), + GPRnopc:$Rn))]>; +def QDSUB : AAIRevOpr<0b00010110, 0b00000101, "qdsub", + [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, + (int_arm_qadd GPRnopc:$Rn, GPRnopc:$Rn)))]>; +def QSUB : AAIRevOpr<0b00010010, 0b00000101, "qsub", + [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))]>; let DecoderMethod = "DecodeQADDInstruction" in -def QADD : AAI<0b00010000, 0b00000101, "qadd", - [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))], - (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; - -def QSUB : AAI<0b00010010, 0b00000101, "qsub", - [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))], - (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; -def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [], - (ins GPRnopc:$Rm, GPRnopc:$Rn), - "\t$Rd, $Rm, $Rn">; -def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [], - (ins GPRnopc:$Rm, GPRnopc:$Rn), - "\t$Rd, $Rm, $Rn">; - -def QADD16 : AAI<0b01100010, 0b11110001, "qadd16">; -def QADD8 : AAI<0b01100010, 0b11111001, "qadd8">; -def QASX : AAI<0b01100010, 0b11110011, "qasx">; -def QSAX : AAI<0b01100010, 0b11110101, "qsax">; -def QSUB16 : AAI<0b01100010, 0b11110111, "qsub16">; -def QSUB8 : AAI<0b01100010, 0b11111111, "qsub8">; -def UQADD16 : AAI<0b01100110, 0b11110001, "uqadd16">; -def UQADD8 : AAI<0b01100110, 0b11111001, "uqadd8">; -def UQASX : AAI<0b01100110, 0b11110011, "uqasx">; -def UQSAX : AAI<0b01100110, 0b11110101, "uqsax">; -def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">; -def UQSUB8 : AAI<0b01100110, 0b11111111, "uqsub8">; + def QADD : AAIRevOpr<0b00010000, 0b00000101, "qadd", + [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))]>; +} + +def UQADD16 : AAIIntrinsic<0b01100110, 0b11110001, "uqadd16", int_arm_uqadd16>; +def UQADD8 : AAIIntrinsic<0b01100110, 0b11111001, "uqadd8", int_arm_uqadd8>; +def UQSUB16 : AAIIntrinsic<0b01100110, 0b11110111, "uqsub16", int_arm_uqsub16>; +def UQSUB8 : AAIIntrinsic<0b01100110, 0b11111111, "uqsub8", int_arm_uqsub8>; +def QASX : AAIIntrinsic<0b01100010, 0b11110011, "qasx", int_arm_qasx>; +def QSAX : AAIIntrinsic<0b01100010, 0b11110101, "qsax", int_arm_qsax>; +def UQASX : AAIIntrinsic<0b01100110, 0b11110011, "uqasx", int_arm_uqasx>; +def UQSAX : AAIIntrinsic<0b01100110, 0b11110101, "uqsax", int_arm_uqsax>; // Signed/Unsigned add/subtract -def SASX : AAI<0b01100001, 0b11110011, "sasx">; -def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">; -def SADD8 : AAI<0b01100001, 0b11111001, "sadd8">; -def SSAX : AAI<0b01100001, 0b11110101, "ssax">; -def SSUB16 : AAI<0b01100001, 0b11110111, "ssub16">; -def SSUB8 : AAI<0b01100001, 0b11111111, "ssub8">; -def UASX : AAI<0b01100101, 0b11110011, "uasx">; -def UADD16 : AAI<0b01100101, 0b11110001, "uadd16">; -def UADD8 : AAI<0b01100101, 0b11111001, "uadd8">; -def USAX : AAI<0b01100101, 0b11110101, "usax">; -def USUB16 : AAI<0b01100101, 0b11110111, "usub16">; -def USUB8 : AAI<0b01100101, 0b11111111, "usub8">; +def SASX : AAIIntrinsic<0b01100001, 0b11110011, "sasx", int_arm_sasx>; +def SADD16 : AAIIntrinsic<0b01100001, 0b11110001, "sadd16", int_arm_sadd16>; +def SADD8 : AAIIntrinsic<0b01100001, 0b11111001, "sadd8", int_arm_sadd8>; +def SSAX : AAIIntrinsic<0b01100001, 0b11110101, "ssax", int_arm_ssax>; +def SSUB16 : AAIIntrinsic<0b01100001, 0b11110111, "ssub16", int_arm_ssub16>; +def SSUB8 : AAIIntrinsic<0b01100001, 0b11111111, "ssub8", int_arm_ssub8>; +def UASX : AAIIntrinsic<0b01100101, 0b11110011, "uasx", int_arm_uasx>; +def UADD16 : AAIIntrinsic<0b01100101, 0b11110001, "uadd16", int_arm_uadd16>; +def UADD8 : AAIIntrinsic<0b01100101, 0b11111001, "uadd8", int_arm_uadd8>; +def USAX : AAIIntrinsic<0b01100101, 0b11110101, "usax", int_arm_usax>; +def USUB16 : AAIIntrinsic<0b01100101, 0b11110111, "usub16", int_arm_usub16>; +def USUB8 : AAIIntrinsic<0b01100101, 0b11111111, "usub8", int_arm_usub8>; // Signed/Unsigned halving add/subtract -def SHASX : AAI<0b01100011, 0b11110011, "shasx">; -def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">; -def SHADD8 : AAI<0b01100011, 0b11111001, "shadd8">; -def SHSAX : AAI<0b01100011, 0b11110101, "shsax">; -def SHSUB16 : AAI<0b01100011, 0b11110111, "shsub16">; -def SHSUB8 : AAI<0b01100011, 0b11111111, "shsub8">; -def UHASX : AAI<0b01100111, 0b11110011, "uhasx">; -def UHADD16 : AAI<0b01100111, 0b11110001, "uhadd16">; -def UHADD8 : AAI<0b01100111, 0b11111001, "uhadd8">; -def UHSAX : AAI<0b01100111, 0b11110101, "uhsax">; -def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">; -def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">; +def SHASX : AAIIntrinsic<0b01100011, 0b11110011, "shasx", int_arm_shasx>; +def SHADD16 : AAIIntrinsic<0b01100011, 0b11110001, "shadd16", int_arm_shadd16>; +def SHADD8 : AAIIntrinsic<0b01100011, 0b11111001, "shadd8", int_arm_shadd8>; +def SHSAX : AAIIntrinsic<0b01100011, 0b11110101, "shsax", int_arm_shsax>; +def SHSUB16 : AAIIntrinsic<0b01100011, 0b11110111, "shsub16", int_arm_shsub16>; +def SHSUB8 : AAIIntrinsic<0b01100011, 0b11111111, "shsub8", int_arm_shsub8>; +def UHASX : AAIIntrinsic<0b01100111, 0b11110011, "uhasx", int_arm_uhasx>; +def UHADD16 : AAIIntrinsic<0b01100111, 0b11110001, "uhadd16", int_arm_uhadd16>; +def UHADD8 : AAIIntrinsic<0b01100111, 0b11111001, "uhadd8", int_arm_uhadd8>; +def UHSAX : AAIIntrinsic<0b01100111, 0b11110101, "uhsax", int_arm_uhsax>; +def UHSUB16 : AAIIntrinsic<0b01100111, 0b11110111, "uhsub16", int_arm_uhsub16>; +def UHSUB8 : AAIIntrinsic<0b01100111, 0b11111111, "uhsub8", int_arm_uhsub8>; // Unsigned Sum of Absolute Differences [and Accumulate]. def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), MulFrm /* for convenience */, NoItinerary, "usad8", - "\t$Rd, $Rn, $Rm", []>, + "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (int_arm_usad8 GPR:$Rn, GPR:$Rm))]>, Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]> { bits<4> Rd; bits<4> Rn; @@ -3707,7 +3739,8 @@ } def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), MulFrm /* for convenience */, NoItinerary, "usada8", - "\t$Rd, $Rn, $Rm, $Ra", []>, + "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (int_arm_usada8 GPR:$Rn, GPR:$Rm, GPR:$Ra))]>, Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]>{ bits<4> Rd; bits<4> Rn; @@ -3722,7 +3755,7 @@ } // Signed/Unsigned saturate - +let hasSideEffects = 1 in { def SSAT : AI<(outs GPRnopc:$Rd), (ins imm1_32:$sat_imm, GPRnopc:$Rn, shift_imm:$sh), SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>, @@ -3784,6 +3817,7 @@ let Inst{19-16} = sat_imm; let Inst{3-0} = Rn; } +} // hasSideEffects def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm1_32:$pos), (SSAT imm1_32:$pos, GPRnopc:$a, 0)>; @@ -3791,6 +3825,10 @@ (USAT imm0_31:$pos, GPRnopc:$a, 0)>; def : ARMPat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), (SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; +def : ARMV6Pat<(int_arm_ssat16 GPRnopc:$a, imm1_16:$pos), + (SSAT16 imm1_16:$pos, GPRnopc:$a)>; +def : ARMV6Pat<(int_arm_usat16 GPRnopc:$a, imm0_15:$pos), + (USAT16 imm0_15:$pos, GPRnopc:$a)>; //===----------------------------------------------------------------------===// // Bitwise Instructions. @@ -4216,12 +4254,13 @@ IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", [(set GPRnopc:$Rd, (add GPR:$Ra, (ARMsmulwt GPRnopc:$Rn, GPRnopc:$Rm)))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>, - Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; } } defm SMUL : AI_smul<"smul">; +let hasSideEffects = 1 in defm SMLA : AI_smla<"smla">; // Halfword multiply accumulate long: SMLAL. @@ -4251,7 +4290,8 @@ // Helper class for AI_smld. class AMulDualIbase - : AI, Requires<[IsARM, HasV6]> { + : AI, + Requires<[IsARM, HasV6]> { bits<4> Rn; bits<4> Rm; let Inst{27-23} = 0b01110; @@ -4301,20 +4341,40 @@ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary, + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + NoItinerary, !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary, + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + NoItinerary, !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; - } defm SMLA : AI_smld<0, "smla">; defm SMLS : AI_smld<1, "smls">; +def : Pat<(int_arm_smlad GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLAD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : Pat<(int_arm_smladx GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLADX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : Pat<(int_arm_smlsd GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLSD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : Pat<(int_arm_smlsdx GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLSDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : Pat<(ARMSmlald GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLALD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; +def : Pat<(ARMSmlaldx GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLALDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; +def : Pat<(ARMSmlsld GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLSLD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; +def : Pat<(ARMSmlsldx GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLSLDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; + multiclass AI_sdml { def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), @@ -4325,9 +4385,19 @@ Sched<[WriteMUL32, ReadMUL, ReadMUL]>; } +let hasSideEffects = 1 in defm SMUA : AI_sdml<0, "smua">; defm SMUS : AI_sdml<1, "smus">; +def : Pat<(int_arm_smuad GPRnopc:$Rn, GPRnopc:$Rm), + (SMUAD GPRnopc:$Rn, GPRnopc:$Rm)>; +def : Pat<(int_arm_smuadx GPRnopc:$Rn, GPRnopc:$Rm), + (SMUADX GPRnopc:$Rn, GPRnopc:$Rm)>; +def : Pat<(int_arm_smusd GPRnopc:$Rn, GPRnopc:$Rm), + (SMUSD GPRnopc:$Rn, GPRnopc:$Rm)>; +def : Pat<(int_arm_smusdx GPRnopc:$Rn, GPRnopc:$Rm), + (SMUSDX GPRnopc:$Rn, GPRnopc:$Rm)>; + //===----------------------------------------------------------------------===// // Division Instructions (ARMv7-A with virtualization extension) // @@ -5644,6 +5714,32 @@ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>, Sched<[WriteMUL32, ReadMUL, ReadMUL]>; +def : ARMV5TEPat<(int_arm_smulbb GPR:$a, GPR:$b), + (SMULBB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smulbt GPR:$a, GPR:$b), + (SMULBT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smultb GPR:$a, GPR:$b), + (SMULTB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smultt GPR:$a, GPR:$b), + (SMULTT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smulwb GPR:$a, GPR:$b), + (SMULWB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smulwt GPR:$a, GPR:$b), + (SMULWT GPR:$a, GPR:$b)>; + +def : Pat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : Pat<(int_arm_smlabt GPR:$a, GPR:$b, GPR:$acc), + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; +def : Pat<(int_arm_smlatb GPR:$a, GPR:$b, GPR:$acc), + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; +def : Pat<(int_arm_smlatt GPR:$a, GPR:$b, GPR:$acc), + (SMLATT GPR:$a, GPR:$b, GPR:$acc)>; +def : Pat<(int_arm_smlawb GPR:$a, GPR:$b, GPR:$acc), + (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; +def : Pat<(int_arm_smlawt GPR:$a, GPR:$b, GPR:$acc), + (SMLAWT GPR:$a, GPR:$b, GPR:$acc)>; + // Pre-v7 uses MCR for synchronization barriers. def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>, Requires<[IsARM, HasV6]>; Index: lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- lib/Target/ARM/ARMInstrThumb2.td +++ lib/Target/ARM/ARMInstrThumb2.td @@ -2026,6 +2026,9 @@ def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x00FF00FF), (t2UXTB16 rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(int_arm_uxtb16 rGPR:$Rm), + (t2UXTB16 rGPR:$Rm, 0)>; + // FIXME: This pattern incorrectly assumes the shl operator is a rotate. // The transformation should probably be done as a combiner action // instead so we can include a check for masking back in the upper @@ -2053,6 +2056,8 @@ def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)), (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(int_arm_uxtab16 rGPR:$Rn, rGPR:$Rm), + (t2UXTAB16 rGPR:$Rn, rGPR:$Rm, 0)>; } @@ -2137,10 +2142,9 @@ def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR), (t2SBCrr rGPR:$src, (t2MOVi16 (imm_not_XFORM imm:$imm)))>; -// Select Bytes -- for disassembly only - def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>, + NoItinerary, "sel", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (int_arm_sel GPR:$Rn, GPR:$Rm))]>, Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; let Inst{26-24} = 0b010; @@ -2154,9 +2158,7 @@ // A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned) // And Miscellaneous operations -- for disassembly only class T2I_pam op22_20, bits<4> op7_4, string opc, - list pat = [/* For disassembly only; pattern left blank */], - dag iops = (ins rGPR:$Rn, rGPR:$Rm), - string asm = "\t$Rd, $Rn, $Rm"> + list pat, dag iops, string asm> : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>, Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; @@ -2174,60 +2176,72 @@ let Inst{3-0} = Rm; } -// Saturating add/subtract -- for disassembly only - -def t2QADD : T2I_pam<0b000, 0b1000, "qadd", - [(set rGPR:$Rd, (int_arm_qadd rGPR:$Rn, rGPR:$Rm))], - (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; -def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">; -def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">; -def t2QASX : T2I_pam<0b010, 0b0001, "qasx">; -def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd", [], - (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; -def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub", [], - (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; -def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">; -def t2QSUB : T2I_pam<0b000, 0b1010, "qsub", - [(set rGPR:$Rd, (int_arm_qsub rGPR:$Rn, rGPR:$Rm))], - (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; -def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">; -def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">; -def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">; -def t2UQADD8 : T2I_pam<0b000, 0b0101, "uqadd8">; -def t2UQASX : T2I_pam<0b010, 0b0101, "uqasx">; -def t2UQSAX : T2I_pam<0b110, 0b0101, "uqsax">; -def t2UQSUB16 : T2I_pam<0b101, 0b0101, "uqsub16">; -def t2UQSUB8 : T2I_pam<0b100, 0b0101, "uqsub8">; - -// Signed/Unsigned add/subtract -- for disassembly only - -def t2SASX : T2I_pam<0b010, 0b0000, "sasx">; -def t2SADD16 : T2I_pam<0b001, 0b0000, "sadd16">; -def t2SADD8 : T2I_pam<0b000, 0b0000, "sadd8">; -def t2SSAX : T2I_pam<0b110, 0b0000, "ssax">; -def t2SSUB16 : T2I_pam<0b101, 0b0000, "ssub16">; -def t2SSUB8 : T2I_pam<0b100, 0b0000, "ssub8">; -def t2UASX : T2I_pam<0b010, 0b0100, "uasx">; -def t2UADD16 : T2I_pam<0b001, 0b0100, "uadd16">; -def t2UADD8 : T2I_pam<0b000, 0b0100, "uadd8">; -def t2USAX : T2I_pam<0b110, 0b0100, "usax">; -def t2USUB16 : T2I_pam<0b101, 0b0100, "usub16">; -def t2USUB8 : T2I_pam<0b100, 0b0100, "usub8">; - -// Signed/Unsigned halving add/subtract -- for disassembly only - -def t2SHASX : T2I_pam<0b010, 0b0010, "shasx">; -def t2SHADD16 : T2I_pam<0b001, 0b0010, "shadd16">; -def t2SHADD8 : T2I_pam<0b000, 0b0010, "shadd8">; -def t2SHSAX : T2I_pam<0b110, 0b0010, "shsax">; -def t2SHSUB16 : T2I_pam<0b101, 0b0010, "shsub16">; -def t2SHSUB8 : T2I_pam<0b100, 0b0010, "shsub8">; -def t2UHASX : T2I_pam<0b010, 0b0110, "uhasx">; -def t2UHADD16 : T2I_pam<0b001, 0b0110, "uhadd16">; -def t2UHADD8 : T2I_pam<0b000, 0b0110, "uhadd8">; -def t2UHSAX : T2I_pam<0b110, 0b0110, "uhsax">; -def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">; -def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">; +class T2I_pam_intrinsics op22_20, bits<4> op7_4, string opc, + Intrinsic intrinsic> + : T2I_pam; + +class T2I_pam_intrinsics_rev op22_20, bits<4> op7_4, string opc> + : T2I_pam; + +// Saturating add/subtract +def t2QADD16 : T2I_pam_intrinsics<0b001, 0b0001, "qadd16", int_arm_qadd16>; +def t2QADD8 : T2I_pam_intrinsics<0b000, 0b0001, "qadd8", int_arm_qadd8>; +def t2QASX : T2I_pam_intrinsics<0b010, 0b0001, "qasx", int_arm_qasx>; +def t2UQSUB8 : T2I_pam_intrinsics<0b100, 0b0101, "uqsub8", int_arm_uqsub8>; +def t2QSAX : T2I_pam_intrinsics<0b110, 0b0001, "qsax", int_arm_qsax>; +def t2QSUB16 : T2I_pam_intrinsics<0b101, 0b0001, "qsub16", int_arm_qsub16>; +def t2QSUB8 : T2I_pam_intrinsics<0b100, 0b0001, "qsub8", int_arm_qsub8>; +def t2UQADD16 : T2I_pam_intrinsics<0b001, 0b0101, "uqadd16", int_arm_uqadd16>; +def t2UQADD8 : T2I_pam_intrinsics<0b000, 0b0101, "uqadd8", int_arm_uqadd8>; +def t2UQASX : T2I_pam_intrinsics<0b010, 0b0101, "uqasx", int_arm_uqasx>; +def t2UQSAX : T2I_pam_intrinsics<0b110, 0b0101, "uqsax", int_arm_uqsax>; +def t2UQSUB16 : T2I_pam_intrinsics<0b101, 0b0101, "uqsub16", int_arm_uqsub16>; +def t2QADD : T2I_pam_intrinsics_rev<0b000, 0b1000, "qadd">; +def t2QSUB : T2I_pam_intrinsics_rev<0b000, 0b1010, "qsub">; +def t2QDADD : T2I_pam_intrinsics_rev<0b000, 0b1001, "qdadd">; +def t2QDSUB : T2I_pam_intrinsics_rev<0b000, 0b1011, "qdsub">; + +def : Pat<(int_arm_qadd rGPR:$Rm, rGPR:$Rn), + (t2QADD rGPR:$Rm, rGPR:$Rn)>; +def : Pat<(int_arm_qsub rGPR:$Rm, rGPR:$Rn), + (t2QSUB rGPR:$Rm, rGPR:$Rn)>; +def : Pat<(int_arm_qadd(int_arm_qadd rGPR:$Rm, rGPR:$Rm), rGPR:$Rn), + (t2QDADD rGPR:$Rm, rGPR:$Rn)>; +def : Pat<(int_arm_qsub rGPR:$Rm, (int_arm_qadd rGPR:$Rn, rGPR:$Rn)), + (t2QDSUB rGPR:$Rm, rGPR:$Rn)>; + +// Signed/Unsigned add/subtract + +def t2SASX : T2I_pam_intrinsics<0b010, 0b0000, "sasx", int_arm_sasx>; +def t2SADD16 : T2I_pam_intrinsics<0b001, 0b0000, "sadd16", int_arm_sadd16>; +def t2SADD8 : T2I_pam_intrinsics<0b000, 0b0000, "sadd8", int_arm_sadd8>; +def t2SSAX : T2I_pam_intrinsics<0b110, 0b0000, "ssax", int_arm_ssax>; +def t2SSUB16 : T2I_pam_intrinsics<0b101, 0b0000, "ssub16", int_arm_ssub16>; +def t2SSUB8 : T2I_pam_intrinsics<0b100, 0b0000, "ssub8", int_arm_ssub8>; +def t2UASX : T2I_pam_intrinsics<0b010, 0b0100, "uasx", int_arm_uasx>; +def t2UADD16 : T2I_pam_intrinsics<0b001, 0b0100, "uadd16", int_arm_uadd16>; +def t2UADD8 : T2I_pam_intrinsics<0b000, 0b0100, "uadd8", int_arm_uadd8>; +def t2USAX : T2I_pam_intrinsics<0b110, 0b0100, "usax", int_arm_usax>; +def t2USUB16 : T2I_pam_intrinsics<0b101, 0b0100, "usub16", int_arm_usub16>; +def t2USUB8 : T2I_pam_intrinsics<0b100, 0b0100, "usub8", int_arm_usub8>; + +// Signed/Unsigned halving add/subtract + +def t2SHASX : T2I_pam_intrinsics<0b010, 0b0010, "shasx", int_arm_shasx>; +def t2SHADD16 : T2I_pam_intrinsics<0b001, 0b0010, "shadd16", int_arm_shadd16>; +def t2SHADD8 : T2I_pam_intrinsics<0b000, 0b0010, "shadd8", int_arm_shadd8>; +def t2SHSAX : T2I_pam_intrinsics<0b110, 0b0010, "shsax", int_arm_shsax>; +def t2SHSUB16 : T2I_pam_intrinsics<0b101, 0b0010, "shsub16", int_arm_shsub16>; +def t2SHSUB8 : T2I_pam_intrinsics<0b100, 0b0010, "shsub8", int_arm_shsub8>; +def t2UHASX : T2I_pam_intrinsics<0b010, 0b0110, "uhasx", int_arm_uhasx>; +def t2UHADD16 : T2I_pam_intrinsics<0b001, 0b0110, "uhadd16", int_arm_uhadd16>; +def t2UHADD8 : T2I_pam_intrinsics<0b000, 0b0110, "uhadd8", int_arm_uhadd8>; +def t2UHSAX : T2I_pam_intrinsics<0b110, 0b0110, "uhsax", int_arm_uhsax>; +def t2UHSUB16 : T2I_pam_intrinsics<0b101, 0b0110, "uhsub16", int_arm_uhsub16>; +def t2UHSUB8 : T2I_pam_intrinsics<0b100, 0b0110, "uhsub8", int_arm_uhsub8>; // Helper class for disassembly only // A6.3.16 & A6.3.17 @@ -2255,16 +2269,19 @@ // Unsigned Sum of Absolute Differences [and Accumulate]. def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>, + NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (int_arm_usad8 rGPR:$Rn, rGPR:$Rm))]>, Requires<[IsThumb2, HasDSP]> { let Inst{15-12} = 0b1111; } def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary, - "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>, + "usada8", "\t$Rd, $Rn, $Rm, $Ra", + [(set rGPR:$Rd, (int_arm_usada8 rGPR:$Rn, rGPR:$Rm, rGPR:$Ra))]>, Requires<[IsThumb2, HasDSP]>; // Signed/Unsigned saturate. +let hasSideEffects = 1 in class T2SatI : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, []> { bits<4> Rd; @@ -2313,10 +2330,16 @@ let Inst{4} = 0; } -def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>; -def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), (t2USAT imm0_31:$pos, GPR:$a, 0)>; def : T2Pat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), (t2SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; +def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), + (t2SSAT imm1_32:$pos, GPR:$a, 0)>; +def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), + (t2USAT imm0_31:$pos, GPR:$a, 0)>; +def : T2Pat<(int_arm_ssat16 GPR:$a, imm1_16:$pos), + (t2SSAT16 imm1_16:$pos, GPR:$a)>; +def : T2Pat<(int_arm_usat16 GPR:$a, imm0_15:$pos), + (t2USAT16 imm0_15:$pos, GPR:$a)>; //===----------------------------------------------------------------------===// // Shift and rotate Instructions. @@ -2689,6 +2712,18 @@ (t2SMULBT rGPR:$Rn, rGPR:$Rm)>; def : Thumb2DSPPat<(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm), (t2SMULTB rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smulbb rGPR:$Rn, rGPR:$Rm), + (t2SMULBB rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smulbt rGPR:$Rn, rGPR:$Rm), + (t2SMULBT rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smultb rGPR:$Rn, rGPR:$Rm), + (t2SMULTB rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smultt rGPR:$Rn, rGPR:$Rm), + (t2SMULTT rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smulwb rGPR:$Rn, rGPR:$Rm), + (t2SMULWB rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smulwt rGPR:$Rn, rGPR:$Rm), + (t2SMULWT rGPR:$Rn, rGPR:$Rm)>; class T2FourRegSMLA op22_20, bits<2> op5_4, string opc, list pattern> Index: test/CodeGen/ARM/acle-intrinsics.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/acle-intrinsics.ll @@ -0,0 +1,582 @@ +; RUN: llc -O1 -mtriple=armv6-none-none-eabi %s -o - | FileCheck %s +; RUN: llc -O1 -mtriple=armv7-none-none-eabi %s -o - | FileCheck %s +; RUN: llc -O1 -mtriple=thumbv7-none-none-eabi %s -o - | FileCheck %s +; RUN: llc -O1 -mtriple=thumbv6t2-none-none-eabi %s -o - | FileCheck %s +; RUN: llc -O1 -mtriple=thumbv8m.main-none-none-eabi -mattr=+dsp %s -o - | FileCheck %s + +define i32 @smulbb(i32 %a, i32 %b) { +; CHECK-LABEL: smulbb +; CHECK: smulbb r0, r0, r1 + %tmp = call i32 @llvm.arm.smulbb(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @smulbt(i32 %a, i32 %b) { +; CHECK-LABEL: smulbt +; CHECK: smulbt r0, r0, r1 + %tmp = call i32 @llvm.arm.smulbt(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @smultb(i32 %a, i32 %b) { +; CHECK-LABEL: smultb +; CHECK: smultb r0, r0, r1 + %tmp = call i32 @llvm.arm.smultb(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @smultt(i32 %a, i32 %b) { +; CHECK-LABEL: smultt +; CHECK: smultt r0, r0, r1 + %tmp = call i32 @llvm.arm.smultt(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @smulwb(i32 %a, i32 %b) { +; CHECK-LABEL: smulwb +; CHECK: smulwb r0, r0, r1 + %tmp = call i32 @llvm.arm.smulwb(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @smulwt(i32 %a, i32 %b) { +; CHECK-LABEL: smulwt +; CHECK: smulwt r0, r0, r1 + %tmp = call i32 @llvm.arm.smulwt(i32 %a, i32 %b) + ret i32 %tmp +} + +; upper-bound of the immediate argument +define i32 @ssat1(i32 %a) nounwind { +; CHECK-LABEL: ssat1 +; CHECK: ssat r0, #32, r0 + %tmp = call i32 @llvm.arm.ssat(i32 %a, i32 32) + ret i32 %tmp +} + +; lower-bound of the immediate argument +define i32 @ssat2(i32 %a) nounwind { +; CHECK-LABEL: ssat2 +; CHECK: ssat r0, #1, r0 + %tmp = call i32 @llvm.arm.ssat(i32 %a, i32 1) + ret i32 %tmp +} + +; upper-bound of the immediate argument +define i32 @usat1(i32 %a) nounwind { +; CHECK-LABEL: usat1 +; CHECK: usat r0, #31, r0 + %tmp = call i32 @llvm.arm.usat(i32 %a, i32 31) + ret i32 %tmp +} + +; lower-bound of the immediate argument +define i32 @usat2(i32 %a) nounwind { +; CHECK-LABEL: usat2 +; CHECK: usat r0, #0, r0 + %tmp = call i32 @llvm.arm.usat(i32 %a, i32 0) + ret i32 %tmp +} + +define i32 @ssat16 (i32 %a) nounwind { +; CHECK-LABEL: ssat16 +; CHECK: ssat16 r0, #1, r0 +; CHECK: ssat16 r0, #16, r0 + %tmp = call i32 @llvm.arm.ssat16(i32 %a, i32 1) + %tmp2 = call i32 @llvm.arm.ssat16(i32 %tmp, i32 16) + ret i32 %tmp2 +} + +define i32 @usat16(i32 %a) nounwind { +; CHECK-LABEL: usat16 +; CHECK: usat16 r0, #0, r0 +; CHECK: usat16 r1, #15, r0 + %tmp = call i32 @llvm.arm.usat16(i32 %a, i32 0) + %tmp2 = call i32 @llvm.arm.usat16(i32 %tmp, i32 15) + ret i32 %tmp +} + +define i32 @qadd(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qadd +; CHECK: qadd r0, r0, r1 + %tmp = call i32 @llvm.arm.qadd(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @qsub(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qsub +; CHECK: qsub r0, r0, r1 + %tmp = call i32 @llvm.arm.qsub(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @qdadd(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qdadd +; CHECK: qdadd r0, r0, r1 + %dbl = call i32 @llvm.arm.qadd(i32 %a, i32 %a) + %add = call i32 @llvm.arm.qadd(i32 %dbl, i32 %b) + ret i32 %add +} + +define i32 @qdsub(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qdsub +; CHECK: qdsub r0, r0, r1 + %dbl = call i32 @llvm.arm.qadd(i32 %b, i32 %b) + %add = call i32 @llvm.arm.qsub(i32 %a, i32 %dbl) + ret i32 %add +} + +define i32 @acc_mults(i32 %a, i32 %b, i32 %acc) { +; CHECK-LABEL: acc_mults +; CHECK: smlabb r2, r0, r1, r2 +; CHECK: smlabt r2, r0, r1, r2 +; CHECK: smlatb r2, r0, r1, r2 +; CHECK: smlatt r2, r0, r1, r2 +; CHECK: smlawb r2, r0, r1, r2 +; CHECK: smlawt r0, r0, r1, r2 + %acc1 = call i32 @llvm.arm.smlabb(i32 %a, i32 %b, i32 %acc) + %acc2 = call i32 @llvm.arm.smlabt(i32 %a, i32 %b, i32 %acc1) + %acc3 = call i32 @llvm.arm.smlatb(i32 %a, i32 %b, i32 %acc2) + %acc4 = call i32 @llvm.arm.smlatt(i32 %a, i32 %b, i32 %acc3) + %acc5 = call i32 @llvm.arm.smlawb(i32 %a, i32 %b, i32 %acc4) + %acc6 = call i32 @llvm.arm.smlawt(i32 %a, i32 %b, i32 %acc5) + ret i32 %acc6 +} + +define i32 @pack_unpack(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: pack_unpack +; CHECK: sxtab16 r0, r0, r1 +; CHECK: sxtb16 r0, r0 +; CHECK: uxtab16 r0, r1, r0 +; CHECK: uxtb16 r0, r0 + %tmp = call i32 @llvm.arm.sxtab16(i32 %a, i32 %b) + %tmp1 = call i32 @llvm.arm.sxtb16(i32 %tmp) + %tmp2 = call i32 @llvm.arm.uxtab16(i32 %b, i32 %tmp1) + %tmp3 = call i32 @llvm.arm.uxtb16(i32 %tmp2) + ret i32 %tmp3 +} + +define i32 @sel(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: sel +; CHECK sel r0, r0, r1 + %tmp = call i32 @llvm.arm.sel(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @qadd8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qadd8 +; CHECK: qadd8 r0, r0, r1 + %tmp = call i32 @llvm.arm.qadd8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @qsub8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qsub8 +; CHECK: qsub8 r0, r0, r1 + %tmp = call i32 @llvm.arm.qsub8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @sadd8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: sadd8 +; CHECK: sadd8 r0, r0, r1 + %tmp = call i32 @llvm.arm.sadd8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @shadd8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: shadd8 +; CHECK: shadd8 r0, r0, r1 + %tmp = call i32 @llvm.arm.shadd8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @shsub8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: shsub8 +; CHECK: shsub8 r0, r0, r1 + %tmp = call i32 @llvm.arm.shsub8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @ssub8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: ssub8 +; CHECK: ssub8 r0, r0, r1 + %tmp = call i32 @llvm.arm.ssub8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uadd8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uadd8 +; CHECK: uadd8 r0, r0, r1 + %tmp = call i32 @llvm.arm.uadd8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uhadd8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uhadd8 +; CHECK: uhadd8 r0, r0, r1 + %tmp = call i32 @llvm.arm.uhadd8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uhsub8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uhsub8 +; CHECK: uhsub8 r0, r0, r1 + %tmp = call i32 @llvm.arm.uhsub8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uqadd8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uqadd8 +; CHECK: uqadd8 r0, r0, r1 + %tmp = call i32 @llvm.arm.uqadd8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uqsub8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uqsub8 +; CHECK: uqsub8 r0, r0, r1 + %tmp = call i32 @llvm.arm.uqsub8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @usub8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: usub8 +; CHECK: usub8 r0, r0, r1 + %tmp = call i32 @llvm.arm.usub8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @usad(i32 %a, i32 %b, i32 %c) nounwind { +; CHECK-LABEL: usad +; CHECK: usad8 r0, r0, r1 +; CHECK: usada8 r0, r0, r1, r2 + %tmp = call i32 @llvm.arm.usad8(i32 %a, i32 %b) + %tmp1 = call i32 @llvm.arm.usada8(i32 %tmp, i32 %b, i32 %c) + ret i32 %tmp1 +} + +define i32 @qadd16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qadd16 +; CHECK: qadd16 r0, r0, r1 + %tmp = call i32 @llvm.arm.qadd16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @qasx(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qasx +; CHECK: qasx r0, r0, r1 + %tmp = call i32 @llvm.arm.qasx(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @qsax(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qsax +; CHECK: qsax r0, r0, r1 + %tmp = call i32 @llvm.arm.qsax(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @qsub16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qsub16 +; CHECK: qsub16 r0, r0, r1 + %tmp = call i32 @llvm.arm.qsub16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @sadd16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: sadd16 +; CHECK: sadd16 r0, r0, r1 + %tmp = call i32 @llvm.arm.sadd16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @sasx(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: sasx +; CHECK: sasx r0, r0, r1 + %tmp = call i32 @llvm.arm.sasx(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @shadd16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: shadd16 +; CHECK: shadd16 r0, r0, r1 + %tmp = call i32 @llvm.arm.shadd16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @shasx(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: shasx +; CHECK: shasx r0, r0, r1 + %tmp = call i32 @llvm.arm.shasx(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @shsax(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: shsax +; CHECK: shsax r0, r0, r1 + %tmp = call i32 @llvm.arm.shsax(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @shsub16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: shsub16 +; CHECK: shsub16 r0, r0, r1 + %tmp = call i32 @llvm.arm.shsub16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @ssax(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: ssax +; CHECK: ssax r0, r0, r1 + %tmp = call i32 @llvm.arm.ssax(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @ssub16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: ssub16 +; CHECK: ssub16 r0, r0, r1 + %tmp = call i32 @llvm.arm.ssub16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uadd16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uadd16 +; CHECK: uadd16 r0, r0, r1 + %tmp = call i32 @llvm.arm.uadd16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uasx(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uasx +; CHECK: uasx r0, r0, r1 + %tmp = call i32 @llvm.arm.uasx(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uhadd16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uhadd16 +; CHECK: uhadd16 r0, r0, r1 + %tmp = call i32 @llvm.arm.uhadd16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uhasx(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uhasx +; CHECK: uhasx r0, r0, r1 + %tmp = call i32 @llvm.arm.uhasx(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uhsax(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uhsax +; CHECK: uhsax r0, r0, r1 + %tmp = call i32 @llvm.arm.uhsax(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uhsub16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uhsub16 +; CHECK: uhsub16 r0, r0, r1 + %tmp = call i32 @llvm.arm.uhsub16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uqadd16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uqadd16 +; CHECK: uqadd16 r0, r0, r1 + %tmp = call i32 @llvm.arm.uqadd16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uqasx(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uqasx +; CHECK: uqasx r0, r0, r1 + %tmp = call i32 @llvm.arm.uqasx(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uqsax(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uqsax +; CHECK: uqsax r0, r0, r1 + %tmp = call i32 @llvm.arm.uqsax(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uqsub16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uqsub16 +; CHECK: uqsub16 r0, r0, r1 + %tmp = call i32 @llvm.arm.uqsub16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @usax(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: usax +; CHECK: usax r0, r0, r1 + %tmp = call i32 @llvm.arm.usax(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @usub16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: usub16 +; CHECK: usub16 r0, r0, r1 + %tmp = call i32 @llvm.arm.usub16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @smlad(i32 %a, i32 %b, i32 %c) nounwind { +; CHECK-LABEL: smlad +; CHECK: smlad r0, r0, r1, r2 + %tmp = call i32 @llvm.arm.smlad(i32 %a, i32 %b, i32 %c) + ret i32 %tmp +} + +define i32 @smladx(i32 %a, i32 %b, i32 %c) nounwind { +; CHECK-LABEL: smladx +; CHECK: smladx r0, r0, r1, r2 + %tmp = call i32 @llvm.arm.smladx(i32 %a, i32 %b, i32 %c) + ret i32 %tmp +} + +define i64 @smlald(i32 %a, i32 %b, i64 %c) nounwind { +; CHECK-LABEL: smlald +; CHECK: smlald r2, r3, r0, r1 + %tmp = call i64 @llvm.arm.smlald(i32 %a, i32 %b, i64 %c) + ret i64 %tmp +} + +define i64 @smlaldx(i32 %a, i32 %b, i64 %c) nounwind { +; CHECK-LABEL: smlaldx +; CHECK: smlaldx r2, r3, r0, r1 + %tmp = call i64 @llvm.arm.smlaldx(i32 %a, i32 %b, i64 %c) + ret i64 %tmp +} + +define i32 @smlsd(i32 %a, i32 %b, i32 %c) nounwind { +; CHECK-LABEL: smlsd +; CHECK: smlsd r0, r0, r1, r2 + %tmp = call i32 @llvm.arm.smlsd(i32 %a, i32 %b, i32 %c) + ret i32 %tmp +} + +define i32 @smlsdx(i32 %a, i32 %b, i32 %c) nounwind { +; CHECK-LABEL: smlsdx +; CHECK: smlsdx r0, r0, r1, r2 + %tmp = call i32 @llvm.arm.smlsdx(i32 %a, i32 %b, i32 %c) + ret i32 %tmp +} + +define i64 @smlsld(i32 %a, i32 %b, i64 %c) nounwind { +; CHECK-LABEL: smlsld +; CHECK: smlsld r2, r3, r0, r1 + %tmp = call i64 @llvm.arm.smlsld(i32 %a, i32 %b, i64 %c) + ret i64 %tmp +} + +define i64 @smlsldx(i32 %a, i32 %b, i64 %c) nounwind { +; CHECK-LABEL: smlsldx +; CHECK: smlsldx r2, r3, r0, r1 + %tmp = call i64 @llvm.arm.smlsldx(i32 %a, i32 %b, i64 %c) + ret i64 %tmp +} + +define i32 @smuad(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: smuad +; CHECK: smuad r0, r0, r1 + %tmp = call i32 @llvm.arm.smuad(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @smuadx(i32 %a, i32 %b) nounwind { +;CHECK-LABEL: smuadx +; CHECK: smuadx r0, r0, r1 + %tmp = call i32 @llvm.arm.smuadx(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @smusd(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: smusd +; CHECK: smusd r0, r0, r1 + %tmp = call i32 @llvm.arm.smusd(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @smusdx(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: smusdx +; CHECK: smusdx r0, r0, r1 + %tmp = call i32 @llvm.arm.smusdx(i32 %a, i32 %b) + ret i32 %tmp +} +declare i32 @llvm.arm.smulbb(i32 %a, i32 %b) nounwind readnone +declare i32 @llvm.arm.smulbt(i32 %a, i32 %b) nounwind readnone +declare i32 @llvm.arm.smultb(i32 %a, i32 %b) nounwind readnone +declare i32 @llvm.arm.smultt(i32 %a, i32 %b) nounwind readnone +declare i32 @llvm.arm.smulwb(i32 %a, i32 %b) nounwind readnone +declare i32 @llvm.arm.smulwt(i32 %a, i32 %b) nounwind readnone +declare i32 @llvm.arm.ssat(i32, i32) nounwind readnone +declare i32 @llvm.arm.usat(i32, i32) nounwind readnone +declare i32 @llvm.arm.qadd(i32, i32) nounwind +declare i32 @llvm.arm.qsub(i32, i32) nounwind +declare i32 @llvm.arm.smlabb(i32, i32, i32) nounwind +declare i32 @llvm.arm.smlabt(i32, i32, i32) nounwind +declare i32 @llvm.arm.smlatb(i32, i32, i32) nounwind +declare i32 @llvm.arm.smlatt(i32, i32, i32) nounwind +declare i32 @llvm.arm.smlawb(i32, i32, i32) nounwind +declare i32 @llvm.arm.smlawt(i32, i32, i32) nounwind +declare i32 @llvm.arm.ssat16(i32, i32) nounwind +declare i32 @llvm.arm.usat16(i32, i32) nounwind +declare i32 @llvm.arm.sxtab16(i32, i32) +declare i32 @llvm.arm.sxtb16(i32) +declare i32 @llvm.arm.uxtab16(i32, i32) +declare i32 @llvm.arm.uxtb16(i32) +declare i32 @llvm.arm.sel(i32, i32) nounwind +declare i32 @llvm.arm.qadd8(i32, i32) nounwind +declare i32 @llvm.arm.qsub8(i32, i32) nounwind +declare i32 @llvm.arm.sadd8(i32, i32) nounwind +declare i32 @llvm.arm.shadd8(i32, i32) nounwind +declare i32 @llvm.arm.shsub8(i32, i32) nounwind +declare i32 @llvm.arm.ssub8(i32, i32) nounwind +declare i32 @llvm.arm.uadd8(i32, i32) nounwind +declare i32 @llvm.arm.uhadd8(i32, i32) nounwind +declare i32 @llvm.arm.uhsub8(i32, i32) nounwind +declare i32 @llvm.arm.uqadd8(i32, i32) nounwind +declare i32 @llvm.arm.uqsub8(i32, i32) nounwind +declare i32 @llvm.arm.usub8(i32, i32) nounwind +declare i32 @llvm.arm.usad8(i32, i32) nounwind readnone +declare i32 @llvm.arm.usada8(i32, i32, i32) nounwind readnone +declare i32 @llvm.arm.qadd16(i32, i32) nounwind +declare i32 @llvm.arm.qasx(i32, i32) nounwind +declare i32 @llvm.arm.qsax(i32, i32) nounwind +declare i32 @llvm.arm.qsub16(i32, i32) nounwind +declare i32 @llvm.arm.sadd16(i32, i32) nounwind +declare i32 @llvm.arm.sasx(i32, i32) nounwind +declare i32 @llvm.arm.shadd16(i32, i32) nounwind +declare i32 @llvm.arm.shasx(i32, i32) nounwind +declare i32 @llvm.arm.shsax(i32, i32) nounwind +declare i32 @llvm.arm.shsub16(i32, i32) nounwind +declare i32 @llvm.arm.ssax(i32, i32) nounwind +declare i32 @llvm.arm.ssub16(i32, i32) nounwind +declare i32 @llvm.arm.uadd16(i32, i32) nounwind +declare i32 @llvm.arm.uasx(i32, i32) nounwind +declare i32 @llvm.arm.usax(i32, i32) nounwind +declare i32 @llvm.arm.uhadd16(i32, i32) nounwind +declare i32 @llvm.arm.uhasx(i32, i32) nounwind +declare i32 @llvm.arm.uhsax(i32, i32) nounwind +declare i32 @llvm.arm.uhsub16(i32, i32) nounwind +declare i32 @llvm.arm.uqadd16(i32, i32) nounwind +declare i32 @llvm.arm.uqasx(i32, i32) nounwind +declare i32 @llvm.arm.uqsax(i32, i32) nounwind +declare i32 @llvm.arm.uqsub16(i32, i32) nounwind +declare i32 @llvm.arm.usub16(i32, i32) nounwind +declare i32 @llvm.arm.smlad(i32, i32, i32) nounwind +declare i32 @llvm.arm.smladx(i32, i32, i32) nounwind +declare i64 @llvm.arm.smlald(i32, i32, i64) nounwind +declare i64 @llvm.arm.smlaldx(i32, i32, i64) nounwind +declare i32 @llvm.arm.smlsd(i32, i32, i32) nounwind +declare i32 @llvm.arm.smlsdx(i32, i32, i32) nounwind +declare i64 @llvm.arm.smlsld(i32, i32, i64) nounwind +declare i64 @llvm.arm.smlsldx(i32, i32, i64) nounwind +declare i32 @llvm.arm.smuad(i32, i32) nounwind +declare i32 @llvm.arm.smuadx(i32, i32) nounwind +declare i32 @llvm.arm.smusd(i32, i32) nounwind +declare i32 @llvm.arm.smusdx(i32, i32) nounwind Index: test/CodeGen/ARM/sat-arith.ll =================================================================== --- test/CodeGen/ARM/sat-arith.ll +++ /dev/null @@ -1,63 +0,0 @@ -; RUN: llc -O1 -mtriple=armv6-none-none-eabi %s -o - | FileCheck %s -check-prefix=ARM -check-prefix=CHECK -; RUN: llc -O1 -mtriple=thumbv7-none-none-eabi %s -o - | FileCheck %s -check-prefix=THUMB -check-prefix=CHECK - -; CHECK-LABEL: qadd -define i32 @qadd() nounwind { -; CHECK-DAG: mov{{s?}} [[R0:.*]], #8 -; CHECK-DAG: mov{{s?}} [[R1:.*]], #128 -; CHECK-ARM: qadd [[R0]], [[R1]], [[R0]] -; CHECK-THRUMB: qadd [[R0]], [[R0]], [[R1]] - %tmp = call i32 @llvm.arm.qadd(i32 128, i32 8) - ret i32 %tmp -} - -; CHECK-LABEL: qsub -define i32 @qsub() nounwind { -; CHECK-DAG: mov{{s?}} [[R0:.*]], #8 -; CHECK-DAG: mov{{s?}} [[R1:.*]], #128 -; CHECK-ARM: qsub [[R0]], [[R1]], [[R0]] -; CHECK-THRUMB: qadd [[R0]], [[R1]], [[R0]] - %tmp = call i32 @llvm.arm.qsub(i32 128, i32 8) - ret i32 %tmp -} - -; upper-bound of the immediate argument -; CHECK-LABEL: ssat1 -define i32 @ssat1() nounwind { -; CHECK: mov{{s?}} [[R0:.*]], #128 -; CHECK: ssat [[R1:.*]], #32, [[R0]] - %tmp = call i32 @llvm.arm.ssat(i32 128, i32 32) - ret i32 %tmp -} - -; lower-bound of the immediate argument -; CHECK-LABEL: ssat2 -define i32 @ssat2() nounwind { -; CHECK: mov{{s?}} [[R0:.*]], #128 -; CHECK: ssat [[R1:.*]], #1, [[R0]] - %tmp = call i32 @llvm.arm.ssat(i32 128, i32 1) - ret i32 %tmp -} - -; upper-bound of the immediate argument -; CHECK-LABEL: usat1 -define i32 @usat1() nounwind { -; CHECK: mov{{s?}} [[R0:.*]], #128 -; CHECK: usat [[R1:.*]], #31, [[R0]] - %tmp = call i32 @llvm.arm.usat(i32 128, i32 31) - ret i32 %tmp -} - -; lower-bound of the immediate argument -; CHECK-LABEL: usat2 -define i32 @usat2() nounwind { -; CHECK: mov{{s?}} [[R0:.*]], #128 -; CHECK: usat [[R1:.*]], #0, [[R0]] - %tmp = call i32 @llvm.arm.usat(i32 128, i32 0) - ret i32 %tmp -} - -declare i32 @llvm.arm.qadd(i32, i32) nounwind -declare i32 @llvm.arm.qsub(i32, i32) nounwind -declare i32 @llvm.arm.ssat(i32, i32) nounwind readnone -declare i32 @llvm.arm.usat(i32, i32) nounwind readnone