diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -787,4 +787,34 @@ [], [IntrReadMem, IntrWriteMem]>; +def int_arm_mve_pred_i2v : Intrinsic< + [llvm_anyvector_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_arm_mve_pred_v2i : Intrinsic< + [llvm_i32_ty], [llvm_anyvector_ty], [IntrNoMem]>; + +multiclass IntrinsicSignSuffix rets, list params = [], + list props = [], + string name = "", + list sdprops = []> { + def _s: Intrinsic; + def _u: Intrinsic; +} + +def int_arm_mve_add_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_sub_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; + +defm int_arm_mve_minv: IntrinsicSignSuffix<[llvm_i32_ty], + [llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>; +defm int_arm_mve_maxv: IntrinsicSignSuffix<[llvm_i32_ty], + [llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>; + +def int_arm_mve_vcvt: Intrinsic<[llvm_v8f16_ty], + [llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_mve_vcvt_predicated: Intrinsic<[llvm_v8f16_ty], + [llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4i1_ty], [IntrNoMem]>; + } // end TargetPrefix diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -3679,6 +3679,10 @@ case Intrinsic::arm_neon_vtbl2: return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::arm_mve_pred_i2v: + case Intrinsic::arm_mve_pred_v2i: + return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); } } diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -275,6 +275,28 @@ let MIOperandInfo = (ops MQPR:$base, i32imm:$imm); } +// This class is effectively just a sort of 'subroutine', working around the +// fact that Tablegen has no explicit syntax for function definition. If you +// have a vector type like v8i16, and you want the corresponding predicate type +// that should be used in IR intrinsics whose source vector is of that type, +// you can refer to mkpred.p to compute it automatically, e.g. in +// foreach or multiclass definitions. +class mkpred { + ValueType p = !cond(!eq(VT.Value, v16i8.Value): v16i1, + !eq(VT.Value, v8i16.Value): v8i1, + !eq(VT.Value, v8f16.Value): v8i1, + !eq(VT.Value, v4i32.Value): v4i1, + !eq(VT.Value, v4f32.Value): v4i1, + // For vectors of 2 values, use v4i1 instead of v2i1 for + // the moment: MVE codegen doesn't support doing all the + // auxiliary operations on v2i1 such as vector shuffles, + // and also, there's no MVE compare instruction that will + // generate v2i1 directly. We could rethink this later if + // we have a better idea. + !eq(VT.Value, v2i64.Value): v4i1, + !eq(VT.Value, v2f64.Value): v4i1); +} + // --------- Start of base classes for the instructions themselves class MVE_MI pattern=[]> { - def s8 : MVE_VMINMAXV; - def s16 : MVE_VMINMAXV; - def s32 : MVE_VMINMAXV; - def u8 : MVE_VMINMAXV; - def u16 : MVE_VMINMAXV; - def u32 : MVE_VMINMAXV; +multiclass MVE_VMINMAXV_p size, bit bit_17, bit bit_7, + ValueType vtype, Intrinsic intr> { + def "": MVE_VMINMAXV; + + let Predicates = [HasMVEInt] in + def _pat : Pat<(i32 (intr (i32 rGPR:$prev), (vtype MQPR:$vec))), + (i32 (!cast(NAME) + (i32 rGPR:$prev), (vtype MQPR:$vec)))>; +} + +multiclass MVE_VMINMAXV_ty { + defm s8 : MVE_VMINMAXV_p; + defm s16: MVE_VMINMAXV_p; + defm s32: MVE_VMINMAXV_p; + defm u8 : MVE_VMINMAXV_p; + defm u16: MVE_VMINMAXV_p; + defm u32: MVE_VMINMAXV_p; } -defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 0b1>; -defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0b0>; +defm MVE_VMINV : MVE_VMINMAXV_ty< + "vminv", 0b1, int_arm_mve_minv_s, int_arm_mve_minv_u>; +defm MVE_VMAXV : MVE_VMINMAXV_ty< + "vmaxv", 0b0, int_arm_mve_maxv_s, int_arm_mve_maxv_u>; let Predicates = [HasMVEInt] in { def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))), @@ -1481,7 +1517,7 @@ def MVE_VQRDMULHi32 : MVE_VQRDMULH<"s32", 0b10>; class MVE_VADDSUB size, bit subtract, - list pattern=[]> + ValueType VT_, list pattern=[]> : MVE_int { let Inst{28} = subtract; @@ -1490,37 +1526,51 @@ let Inst{12-8} = 0b01000; let Inst{4} = 0b0; let Inst{0} = 0b0; -} -class MVE_VADD size, list pattern=[]> - : MVE_VADDSUB<"vadd", suffix, size, 0b0, pattern>; -class MVE_VSUB size, list pattern=[]> - : MVE_VADDSUB<"vsub", suffix, size, 0b1, pattern>; + ValueType VT = VT_; +} -def MVE_VADDi8 : MVE_VADD<"i8", 0b00>; -def MVE_VADDi16 : MVE_VADD<"i16", 0b01>; -def MVE_VADDi32 : MVE_VADD<"i32", 0b10>; +class MVE_VADD size, ValueType VT> + : MVE_VADDSUB<"vadd", suffix, size, 0b0, VT>; +class MVE_VSUB size, ValueType VT> + : MVE_VADDSUB<"vsub", suffix, size, 0b1, VT>; -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (add (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VADDi8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (add (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VADDi16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (add (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VADDi32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; -} +def MVE_VADDi8 : MVE_VADD<"i8", 0b00, v16i8>; +def MVE_VADDi16 : MVE_VADD<"i16", 0b01, v8i16>; +def MVE_VADDi32 : MVE_VADD<"i32", 0b10, v4i32>; -def MVE_VSUBi8 : MVE_VSUB<"i8", 0b00>; -def MVE_VSUBi16 : MVE_VSUB<"i16", 0b01>; -def MVE_VSUBi32 : MVE_VSUB<"i32", 0b10>; +def MVE_VSUBi8 : MVE_VSUB<"i8", 0b00, v16i8>; +def MVE_VSUBi16 : MVE_VSUB<"i16", 0b01, v8i16>; +def MVE_VSUBi32 : MVE_VSUB<"i32", 0b10, v4i32>; let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (sub (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VSUBi8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (sub (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VSUBi16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (sub (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VSUBi32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + foreach instr = [MVE_VADDi8, MVE_VADDi16, MVE_VADDi32] in + foreach vtype = [instr.VT] in + foreach ptype = [mkpred.p] in { + def : Pat<(vtype (add (vtype MQPR:$Qm), (vtype MQPR:$Qn))), + (vtype (instr (vtype MQPR:$Qm), (vtype MQPR:$Qn)))>; + def : Pat<(vtype (int_arm_mve_add_predicated (vtype MQPR:$Qm), + (vtype MQPR:$Qn), + (ptype VCCR:$mask), + (vtype MQPR:$inactive))), + (vtype (instr (vtype MQPR:$Qm), (vtype MQPR:$Qn), + (i32 1), (ptype VCCR:$mask), + (vtype MQPR:$inactive)))>; + } + + foreach instr = [MVE_VSUBi8, MVE_VSUBi16, MVE_VSUBi32] in + foreach vtype = [instr.VT] in + foreach ptype = [mkpred.p] in { + def : Pat<(vtype (sub (vtype MQPR:$Qm), (vtype MQPR:$Qn))), + (vtype (instr (vtype MQPR:$Qm), (vtype MQPR:$Qn)))>; + def : Pat<(vtype (int_arm_mve_sub_predicated (vtype MQPR:$Qm), + (vtype MQPR:$Qn), + (ptype VCCR:$mask), + (vtype MQPR:$inactive))), + (vtype (instr (vtype MQPR:$Qm), (vtype MQPR:$Qn), + (i32 1), (ptype VCCR:$mask), + (vtype MQPR:$inactive)))>; + } } class MVE_VQADDSUB; class MVE_VADDSUBFMA_fp pattern=[]> : MVEFloatArithNeon; -def MVE_VFMAf16 : MVE_VADDSUBFMA_fp<"vfma", "f16", 0b1, 0b1, 0b0, 0b0, +def MVE_VFMAf16 : MVE_VADDSUBFMA_fp<"vfma", "f16", 0b1, 0b1, 0b0, 0b0, v8f16, (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">; -def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1, +def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1, v4f32, (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">; -def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1, +def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1, v8f16, (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">; let Predicates = [HasMVEFloat, UseFusedMAC] in { @@ -2729,24 +2781,40 @@ } -def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0>; -def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0>; - -let Predicates = [HasMVEFloat] in { - def : Pat<(v4f32 (fadd (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), - (v4f32 (MVE_VADDf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>; - def : Pat<(v8f16 (fadd (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))), - (v8f16 (MVE_VADDf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; -} +def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0, v4f32>; +def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0, v8f16>; -def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1>; -def MVE_VSUBf16 : MVE_VADDSUBFMA_fp<"vsub", "f16", 0b1, 0b0, 0b1, 0b1>; +def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1, v4f32>; +def MVE_VSUBf16 : MVE_VADDSUBFMA_fp<"vsub", "f16", 0b1, 0b0, 0b1, 0b1, v8f16>; let Predicates = [HasMVEFloat] in { - def : Pat<(v4f32 (fsub (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), - (v4f32 (MVE_VSUBf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>; - def : Pat<(v8f16 (fsub (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))), - (v8f16 (MVE_VSUBf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; + foreach instr = [MVE_VADDf16, MVE_VADDf32] in + foreach vtype = [instr.VT] in + foreach ptype = [mkpred.p] in { + def : Pat<(vtype (fadd (vtype MQPR:$Qm), (vtype MQPR:$Qn))), + (vtype (instr (vtype MQPR:$Qm), (vtype MQPR:$Qn)))>; + def : Pat<(vtype (int_arm_mve_add_predicated (vtype MQPR:$Qm), + (vtype MQPR:$Qn), + (ptype VCCR:$mask), + (vtype MQPR:$inactive))), + (vtype (instr (vtype MQPR:$Qm), (vtype MQPR:$Qn), + (i32 1), (ptype VCCR:$mask), + (vtype MQPR:$inactive)))>; + } + + foreach instr = [MVE_VSUBf16, MVE_VSUBf32] in + foreach vtype = [instr.VT] in + foreach ptype = [mkpred.p] in { + def : Pat<(vtype (fsub (vtype MQPR:$Qm), (vtype MQPR:$Qn))), + (vtype (instr (vtype MQPR:$Qm), (vtype MQPR:$Qn)))>; + def : Pat<(vtype (int_arm_mve_sub_predicated (vtype MQPR:$Qm), + (vtype MQPR:$Qn), + (ptype VCCR:$mask), + (vtype MQPR:$inactive))), + (vtype (instr (vtype MQPR:$Qm), (vtype MQPR:$Qn), + (i32 1), (ptype VCCR:$mask), + (vtype MQPR:$inactive)))>; + } } class MVE_VCADD pattern=[]> @@ -3500,6 +3568,17 @@ defm MVE_VCVTf16f32 : MVE_VCVT_ff_halves<"f16.f32", 0b0>; defm MVE_VCVTf32f16 : MVE_VCVT_ff_halves<"f32.f16", 0b1>; +let Predicates = [HasMVEFloat] in { + def : Pat<(v8f16 (int_arm_mve_vcvt (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 0))), + (v8f16 (MVE_VCVTf16f32bh (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm)))>; + def : Pat<(v8f16 (int_arm_mve_vcvt (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 1))), + (v8f16 (MVE_VCVTf16f32th (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm)))>; + def : Pat<(v8f16 (int_arm_mve_vcvt_predicated (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 0), (v4i1 VCCR:$mask))), + (v8f16 (MVE_VCVTf16f32bh (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 1), (v4i1 VCCR:$mask)))>; + def : Pat<(v8f16 (int_arm_mve_vcvt_predicated (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 1), (v4i1 VCCR:$mask))), + (v8f16 (MVE_VCVTf16f32th (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 1), (v4i1 VCCR:$mask)))>; +} + class MVE_VxCADD size, bit halve, string cstr="", list pattern=[]> : MVE_qDest_qSrc @test_vaddq_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vaddq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vadd.i32 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = add <4 x i32> %b, %a + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <8 x half> @test_vsubq_f16(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: test_vsubq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsub.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = fsub <8 x half> %a, %b + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vaddq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vaddq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vaddt.i8 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) + +declare <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) + +define arm_aapcs_vfpcc <4 x float> @test_vsubq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vsubq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vsubt.f32 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> %a, <4 x float> %b, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) + +declare <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <8 x half> @test_vcvttq_f16_f32(<8 x half> %a, <4 x float> %b) { +; CHECK-LABEL: test_vcvttq_f16_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcvtt.f16.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.arm.mve.vcvt(<8 x half> %a, <4 x float> %b, i32 1) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <8 x half> @test_vcvtbq_f16_f32(<8 x half> %a, <4 x float> %b) { +; CHECK-LABEL: test_vcvtbq_f16_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vcvtb.f16.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.arm.mve.vcvt(<8 x half> %a, <4 x float> %b, i32 0) + ret <8 x half> %0 +} + +declare <8 x half> @llvm.arm.mve.vcvt(<8 x half>, <4 x float>, i32) + +define arm_aapcs_vfpcc <8 x half> @test_vcvttq_m_f16_f32(<8 x half> %a, <4 x float> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vcvttq_m_f16_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcvttt.f16.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vcvt.predicated(<8 x half> %a, <4 x float> %b, i32 1, <4 x i1> %1) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vcvtbq_m_f16_f32(<8 x half> %a, <4 x float> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vcvtbq_m_f16_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcvtbt.f16.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vcvt.predicated(<8 x half> %a, <4 x float> %b, i32 0, <4 x i1> %1) + ret <8 x half> %2 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) + +declare <8 x half> @llvm.arm.mve.vcvt.predicated(<8 x half>, <4 x float>, i32, <4 x i1>) diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminvq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminvq.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminvq.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc i32 @test_vminvq_u32(i32 %a, <4 x i32> %b) { +; CHECK-LABEL: test_vminvq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vminv.u32 r0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call i32 @llvm.arm.mve.minv.u.v4i32(i32 %a, <4 x i32> %b) + ret i32 %0 +} + +define arm_aapcs_vfpcc i32 @test_vmaxvq_u8(i32 %a, <16 x i8> %b) { +; CHECK-LABEL: test_vmaxvq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxv.u8 r0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call i32 @llvm.arm.mve.maxv.u.v16i8(i32 %a, <16 x i8> %b) + ret i32 %0 +} + +define arm_aapcs_vfpcc i32 @test_vminvq_s16(i32 %a, <8 x i16> %b) { +; CHECK-LABEL: test_vminvq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vminv.s16 r0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call i32 @llvm.arm.mve.minv.s.v8i16(i32 %a, <8 x i16> %b) + ret i32 %0 +} + +declare i32 @llvm.arm.mve.minv.u.v4i32(i32, <4 x i32>) +declare i32 @llvm.arm.mve.maxv.u.v16i8(i32, <16 x i8>) +declare i32 @llvm.arm.mve.minv.s.v8i16(i32, <8 x i16>)