diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2784,6 +2784,35 @@ LLVMMatchType<0>, LLVMMatchType<0>], []>; + class SME2_VG2_Multi_Single_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>], + [IntrNoMem]>; + + class SME2_VG4_Multi_Single_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>], + [IntrNoMem]>; + + class SME2_VG2_Multi_Multi_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; + + class SME2_VG4_Multi_Multi_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; + class SME2_CVT_VG2_SINGLE_Intrinsic : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>], [llvm_anyvector_ty, LLVMMatchType<0>], @@ -2922,6 +2951,14 @@ } } + // Multi-vector signed saturating doubling multiply high + + def int_aarch64_sve_sqdmulh_single_vgx2 : SME2_VG2_Multi_Single_Intrinsic; + def int_aarch64_sve_sqdmulh_single_vgx4 : SME2_VG4_Multi_Single_Intrinsic; + + def int_aarch64_sve_sqdmulh_vgx2 : SME2_VG2_Multi_Multi_Intrinsic; + def int_aarch64_sve_sqdmulh_vgx4 : SME2_VG4_Multi_Multi_Intrinsic; + // // Multi-vector vertical dot-products // diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -360,6 +360,8 @@ void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, unsigned Opc_rr, unsigned Opc_ri, bool IsIntr = false); + void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs, + bool IsZmMulti, unsigned Opcode); void SelectWhilePair(SDNode *N, unsigned Opc); void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode); void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode); @@ -1796,6 +1798,40 @@ return; } +void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N, + unsigned NumVecs, + bool IsZmMulti, + unsigned Opcode) { + assert(Opcode != 0 && "Unexpected opcode"); + + SDLoc DL(N); + EVT VT = N->getValueType(0); + + auto GetMultiVecOperand = [=](unsigned StartIdx) { + SmallVector Regs(N->op_begin() + StartIdx, + N->op_begin() + StartIdx + NumVecs); + return createZMulTuple(Regs); + }; + + SDValue Zdn = GetMultiVecOperand(1); + + SDValue Zm; + if (IsZmMulti) + Zm = GetMultiVecOperand(NumVecs + 1); + else + Zm = N->getOperand(NumVecs + 1); + + SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm); + + SDValue SuperReg = SDValue(Intrinsic, 0); + for (unsigned i = 0; i < NumVecs; ++i) + ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( + AArch64::zsub0 + i, DL, VT, SuperReg)); + + CurDAG->RemoveDeadNode(N); + return; +} + void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, unsigned Opc_ri, unsigned Opc_rr, bool IsIntr) { @@ -4903,6 +4939,34 @@ if (tryMULLV64LaneV128(IntNo, Node)) return; break; + case Intrinsic::aarch64_sve_sqdmulh_single_vgx2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H, + AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 2, false, Op); + return; + case Intrinsic::aarch64_sve_sqdmulh_single_vgx4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H, + AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 4, false, Op); + return; + case Intrinsic::aarch64_sve_sqdmulh_vgx2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H, + AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D})) + SelectDestructiveMultiIntrinsic(Node, 2, true, Op); + return; + case Intrinsic::aarch64_sve_sqdmulh_vgx4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H, + AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D})) + SelectDestructiveMultiIntrinsic(Node, 4, true, Op); + return; case Intrinsic::aarch64_sve_whilege_x2: if (auto Op = SelectOpcodeFromVT( Node->getValueType(0), diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll @@ -0,0 +1,333 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s + +; SQDMULH (Single, x2) + +define { , } @multi_vec_sat_double_mulh_single_x2_s8( %unused, %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_sat_double_mulh_single_x2_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqdmulh { z4.b, z5.b }, { z4.b, z5.b }, z3.b +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_sat_double_mulh_single_x2_s16( %unused, %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_sat_double_mulh_single_x2_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqdmulh { z4.h, z5.h }, { z4.h, z5.h }, z3.h +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_sat_double_mulh_single_x2_s32( %unused, %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_sat_double_mulh_single_x2_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqdmulh { z4.s, z5.s }, { z4.s, z5.s }, z3.s +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_sat_double_mulh_single_x2_s64( %unused, %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_sat_double_mulh_single_x2_s64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqdmulh { z4.d, z5.d }, { z4.d, z5.d }, z3.d +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64( %zdn1, %zdn2, %zm) + ret { , } %res +} + +; SQDMULH (Single, x4) + +define { , , , } +@multi_vec_sat_double_mulh_single_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_sat_double_mulh_single_x4_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sqdmulh { z24.b - z27.b }, { z24.b - z27.b }, z5.b +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_sat_double_mulh_single_x4_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_sat_double_mulh_single_x4_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sqdmulh { z24.h - z27.h }, { z24.h - z27.h }, z5.h +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_sat_double_mulh_single_x4_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_sat_double_mulh_single_x4_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sqdmulh { z24.s - z27.s }, { z24.s - z27.s }, z5.s +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_sat_double_mulh_single_x4_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_sat_double_mulh_single_x4_s64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sqdmulh { z24.d - z27.d }, { z24.d - z27.d }, z5.d +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +; SQDMULH (x2, Multi) + +define { , } @multi_vec_sat_double_mulh_multi_x2_s8( %unused, %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x2_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqdmulh { z4.b, z5.b }, { z4.b, z5.b }, { z6.b, z7.b } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_sat_double_mulh_multi_x2_s16( %unused, %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x2_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqdmulh { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_sat_double_mulh_multi_x2_s32( %unused, %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x2_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqdmulh { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_sat_double_mulh_multi_x2_s64( %unused, %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x2_s64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqdmulh { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +; SQDMULH (x4, Multi) + +define { , , , } +@multi_vec_sat_double_mulh_multi_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sqdmulh { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_sat_double_mulh_multi_x4_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sqdmulh { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_sat_double_mulh_multi_x4_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sqdmulh { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_sat_double_mulh_multi_x4_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sqdmulh { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +declare { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8(, , ) +declare { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16(, , ) +declare { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32(, , ) +declare { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64(, , ) + +declare { , , , } + @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8(, , , , ) +declare { , , , } + @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16(, , , , ) +declare { , , , } + @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32(, , , , ) +declare { , , , } + @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64(, , , , ) + +declare { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8(, , , ) +declare { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16(, , , ) +declare { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32(, , , ) +declare { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64(, , , ) + +declare { , , , } + @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8(, , , , + , , , ) +declare { , , , } + @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16(, , , , + , , , ) +declare { , , , } + @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32(, , , , + , , , ) +declare { , , , } + @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64(, , , , + , , , )