diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1100,7 +1100,7 @@ [LLVMMatchType<0>, LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>, - llvm_i64_ty], + llvm_i32_ty], [IntrNoMem, ImmArg<3>]>; // NOTE: There is no relationship between these intrinsics beyond an attempt @@ -1791,13 +1791,17 @@ def int_aarch64_sve_sqrshrunb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic; def int_aarch64_sve_sqrshrunt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic; -def int_aarch64_sve_smlalb : SVE2_3VectorArg_Indexed_Intrinsic; -def int_aarch64_sve_smlalt : SVE2_3VectorArg_Indexed_Intrinsic; -def int_aarch64_sve_umlalb : SVE2_3VectorArg_Indexed_Intrinsic; -def int_aarch64_sve_umlalt : SVE2_3VectorArg_Indexed_Intrinsic; -def int_aarch64_sve_smlslb : SVE2_3VectorArg_Indexed_Intrinsic; -def int_aarch64_sve_smlslt : SVE2_3VectorArg_Indexed_Intrinsic; -def int_aarch64_sve_umlslb : SVE2_3VectorArg_Indexed_Intrinsic; -def int_aarch64_sve_umlslt : SVE2_3VectorArg_Indexed_Intrinsic; +def int_aarch64_sve_smlalb_lane : SVE2_3VectorArg_Indexed_Intrinsic; +def int_aarch64_sve_smlalt_lane : SVE2_3VectorArg_Indexed_Intrinsic; +def int_aarch64_sve_umlalb_lane : SVE2_3VectorArg_Indexed_Intrinsic; +def int_aarch64_sve_umlalt_lane : SVE2_3VectorArg_Indexed_Intrinsic; +def int_aarch64_sve_smlslb_lane : SVE2_3VectorArg_Indexed_Intrinsic; +def int_aarch64_sve_smlslt_lane : SVE2_3VectorArg_Indexed_Intrinsic; +def int_aarch64_sve_umlslb_lane : SVE2_3VectorArg_Indexed_Intrinsic; +def int_aarch64_sve_umlslt_lane : SVE2_3VectorArg_Indexed_Intrinsic; +def int_aarch64_sve_sqdmlalb_lane : SVE2_3VectorArg_Indexed_Intrinsic; +def int_aarch64_sve_sqdmlalt_lane : SVE2_3VectorArg_Indexed_Intrinsic; +def int_aarch64_sve_sqdmlslb_lane : SVE2_3VectorArg_Indexed_Intrinsic; +def int_aarch64_sve_sqdmlslt_lane : SVE2_3VectorArg_Indexed_Intrinsic; } diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1467,14 +1467,14 @@ defm SQDMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b101, "sqdmullt">; // SVE2 integer multiply-add long (indexed) - defm SMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1000, "smlalb", int_aarch64_sve_smlalb>; - defm SMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1001, "smlalt", int_aarch64_sve_smlalt>; - defm UMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1010, "umlalb", int_aarch64_sve_umlalb>; - defm UMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1011, "umlalt", int_aarch64_sve_umlalt>; - defm SMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1100, "smlslb", int_aarch64_sve_smlslb>; - defm SMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1101, "smlslt", int_aarch64_sve_smlslt>; - defm UMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1110, "umlslb", int_aarch64_sve_umlslb>; - defm UMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1111, "umlslt", int_aarch64_sve_umlslt>; + defm SMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1000, "smlalb", int_aarch64_sve_smlalb_lane>; + defm SMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1001, "smlalt", int_aarch64_sve_smlalt_lane>; + defm UMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1010, "umlalb", int_aarch64_sve_umlalb_lane>; + defm UMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1011, "umlalt", int_aarch64_sve_umlalt_lane>; + defm SMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1100, "smlslb", int_aarch64_sve_smlslb_lane>; + defm SMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1101, "smlslt", int_aarch64_sve_smlslt_lane>; + defm UMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1110, "umlslb", int_aarch64_sve_umlslb_lane>; + defm UMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1111, "umlslt", int_aarch64_sve_umlslt_lane>; // SVE2 integer multiply-add long (vectors, unpredicated) defm SMLALB_ZZZ : sve2_int_mla_long<0b10000, "smlalb">; @@ -1487,10 +1487,10 @@ defm UMLSLT_ZZZ : sve2_int_mla_long<0b10111, "umlslt">; // SVE2 saturating multiply-add long (indexed) - defm SQDMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0100, "sqdmlalb", null_frag>; - defm SQDMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0101, "sqdmlalt", null_frag>; - defm SQDMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0110, "sqdmlslb", null_frag>; - defm SQDMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0111, "sqdmlslt", null_frag>; + defm SQDMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0100, "sqdmlalb", int_aarch64_sve_sqdmlalb_lane>; + defm SQDMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0101, "sqdmlalt", int_aarch64_sve_sqdmlalt_lane>; + defm SQDMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0110, "sqdmlslb", int_aarch64_sve_sqdmlslb_lane>; + defm SQDMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0111, "sqdmlslt", int_aarch64_sve_sqdmlslt_lane>; // SVE2 saturating multiply-add long (vectors, unpredicated) defm SQDMLALB_ZZZ : sve2_int_mla_long<0b11000, "sqdmlalb">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2414,7 +2414,7 @@ multiclass sve2_int_mla_long_by_indexed_elem opc, string asm, SDPatternOperator op> { def _S : sve2_int_mla_by_indexed_elem<0b10, { opc{3}, 0b0, opc{2-1}, ?, opc{0} }, - asm, ZPR32, ZPR16, ZPR3b16, VectorIndexH> { + asm, ZPR32, ZPR16, ZPR3b16, VectorIndexH32b> { bits<3> Zm; bits<3> iop; let Inst{20-19} = iop{2-1}; @@ -2422,7 +2422,7 @@ let Inst{11} = iop{0}; } def _D : sve2_int_mla_by_indexed_elem<0b11, { opc{3}, 0b0, opc{2-1}, ?, opc{0} }, - asm, ZPR64, ZPR32, ZPR4b32, VectorIndexS> { + asm, ZPR64, ZPR32, ZPR4b32, VectorIndexS32b> { bits<4> Zm; bits<2> iop; let Inst{20} = iop{1}; @@ -2430,8 +2430,8 @@ let Inst{11} = iop{0}; } - def : SVE_4_Op_Imm_Pat(NAME # _S)>; - def : SVE_4_Op_Imm_Pat(NAME # _D)>; + def : SVE_4_Op_Imm_Pat(NAME # _S)>; + def : SVE_4_Op_Imm_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve2-mla-indexed.ll b/llvm/test/CodeGen/AArch64/sve2-mla-indexed.ll --- a/llvm/test/CodeGen/AArch64/sve2-mla-indexed.ll +++ b/llvm/test/CodeGen/AArch64/sve2-mla-indexed.ll @@ -9,10 +9,10 @@ ; CHECK-LABEL: smlalb_i32 ; CHECK: smlalb z0.s, z1.h, z2.h[1] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.smlalb.nxv4i32( %a, - %b, - %c, - i64 1) + %res = call @llvm.aarch64.sve.smlalb.lane.nxv4i32( %a, + %b, + %c, + i32 1) ret %res } @@ -22,10 +22,10 @@ ; CHECK-LABEL: smlalb_i32_2 ; CHECK: smlalb z0.s, z1.h, z2.h[7] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.smlalb.nxv4i32( %a, - %b, - %c, - i64 7) + %res = call @llvm.aarch64.sve.smlalb.lane.nxv4i32( %a, + %b, + %c, + i32 7) ret %res } @@ -35,10 +35,10 @@ ; CHECK-LABEL: smlalb_i64 ; CHECK: smlalb z0.d, z1.s, z2.s[0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.smlalb.nxv2i64( %a, - %b, - %c, - i64 0) + %res = call @llvm.aarch64.sve.smlalb.lane.nxv2i64( %a, + %b, + %c, + i32 0) ret %res } @@ -48,10 +48,10 @@ ; CHECK-LABEL: smlalb_i64_2 ; CHECK: smlalb z0.d, z1.s, z2.s[3] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.smlalb.nxv2i64( %a, - %b, - %c, - i64 3) + %res = call @llvm.aarch64.sve.smlalb.lane.nxv2i64( %a, + %b, + %c, + i32 3) ret %res } @@ -64,10 +64,10 @@ ; CHECK-LABEL: smlalt_i32 ; CHECK: smlalt z0.s, z1.h, z2.h[1] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.smlalt.nxv4i32( %a, - %b, - %c, - i64 1) + %res = call @llvm.aarch64.sve.smlalt.lane.nxv4i32( %a, + %b, + %c, + i32 1) ret %res } @@ -77,10 +77,10 @@ ; CHECK-LABEL: smlalt_i32_2 ; CHECK: smlalt z0.s, z1.h, z2.h[7] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.smlalt.nxv4i32( %a, - %b, - %c, - i64 7) + %res = call @llvm.aarch64.sve.smlalt.lane.nxv4i32( %a, + %b, + %c, + i32 7) ret %res } @@ -90,10 +90,10 @@ ; CHECK-LABEL: smlalt_i64 ; CHECK: smlalt z0.d, z1.s, z2.s[0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.smlalt.nxv2i64( %a, - %b, - %c, - i64 0) + %res = call @llvm.aarch64.sve.smlalt.lane.nxv2i64( %a, + %b, + %c, + i32 0) ret %res } @@ -103,10 +103,10 @@ ; CHECK-LABEL: smlalt_i64_2 ; CHECK: smlalt z0.d, z1.s, z2.s[3] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.smlalt.nxv2i64( %a, - %b, - %c, - i64 3) + %res = call @llvm.aarch64.sve.smlalt.lane.nxv2i64( %a, + %b, + %c, + i32 3) ret %res } @@ -119,10 +119,10 @@ ; CHECK-LABEL: umlalb_i32 ; CHECK: umlalb z0.s, z1.h, z2.h[1] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.umlalb.nxv4i32( %a, - %b, - %c, - i64 1) + %res = call @llvm.aarch64.sve.umlalb.lane.nxv4i32( %a, + %b, + %c, + i32 1) ret %res } @@ -132,10 +132,10 @@ ; CHECK-LABEL: umlalb_i32_2 ; CHECK: umlalb z0.s, z1.h, z2.h[7] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.umlalb.nxv4i32( %a, - %b, - %c, - i64 7) + %res = call @llvm.aarch64.sve.umlalb.lane.nxv4i32( %a, + %b, + %c, + i32 7) ret %res } @@ -145,10 +145,10 @@ ; CHECK-LABEL: umlalb_i64 ; CHECK: umlalb z0.d, z1.s, z2.s[0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.umlalb.nxv2i64( %a, - %b, - %c, - i64 0) + %res = call @llvm.aarch64.sve.umlalb.lane.nxv2i64( %a, + %b, + %c, + i32 0) ret %res } @@ -158,10 +158,10 @@ ; CHECK-LABEL: umlalb_i64_2 ; CHECK: umlalb z0.d, z1.s, z2.s[3] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.umlalb.nxv2i64( %a, - %b, - %c, - i64 3) + %res = call @llvm.aarch64.sve.umlalb.lane.nxv2i64( %a, + %b, + %c, + i32 3) ret %res } @@ -174,10 +174,10 @@ ; CHECK-LABEL: umlalt_i32 ; CHECK: umlalt z0.s, z1.h, z2.h[1] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.umlalt.nxv4i32( %a, - %b, - %c, - i64 1) + %res = call @llvm.aarch64.sve.umlalt.lane.nxv4i32( %a, + %b, + %c, + i32 1) ret %res } @@ -187,10 +187,10 @@ ; CHECK-LABEL: umlalt_i32_2 ; CHECK: umlalt z0.s, z1.h, z2.h[7] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.umlalt.nxv4i32( %a, - %b, - %c, - i64 7) + %res = call @llvm.aarch64.sve.umlalt.lane.nxv4i32( %a, + %b, + %c, + i32 7) ret %res } @@ -200,10 +200,10 @@ ; CHECK-LABEL: umlalt_i64 ; CHECK: umlalt z0.d, z1.s, z2.s[0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.umlalt.nxv2i64( %a, - %b, - %c, - i64 0) + %res = call @llvm.aarch64.sve.umlalt.lane.nxv2i64( %a, + %b, + %c, + i32 0) ret %res } @@ -213,10 +213,10 @@ ; CHECK-LABEL: umlalt_i64_2 ; CHECK: umlalt z0.d, z1.s, z2.s[3] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.umlalt.nxv2i64( %a, - %b, - %c, - i64 3) + %res = call @llvm.aarch64.sve.umlalt.lane.nxv2i64( %a, + %b, + %c, + i32 3) ret %res } @@ -229,10 +229,10 @@ ; CHECK-LABEL: smlslb_i32 ; CHECK: smlslb z0.s, z1.h, z2.h[1] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.smlslb.nxv4i32( %a, - %b, - %c, - i64 1) + %res = call @llvm.aarch64.sve.smlslb.lane.nxv4i32( %a, + %b, + %c, + i32 1) ret %res } @@ -242,10 +242,10 @@ ; CHECK-LABEL: smlslb_i32_2 ; CHECK: smlslb z0.s, z1.h, z2.h[7] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.smlslb.nxv4i32( %a, - %b, - %c, - i64 7) + %res = call @llvm.aarch64.sve.smlslb.lane.nxv4i32( %a, + %b, + %c, + i32 7) ret %res } @@ -255,10 +255,10 @@ ; CHECK-LABEL: smlslb_i64 ; CHECK: smlslb z0.d, z1.s, z2.s[0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.smlslb.nxv2i64( %a, - %b, - %c, - i64 0) + %res = call @llvm.aarch64.sve.smlslb.lane.nxv2i64( %a, + %b, + %c, + i32 0) ret %res } @@ -268,10 +268,10 @@ ; CHECK-LABEL: smlslb_i64_2 ; CHECK: smlslb z0.d, z1.s, z2.s[3] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.smlslb.nxv2i64( %a, - %b, - %c, - i64 3) + %res = call @llvm.aarch64.sve.smlslb.lane.nxv2i64( %a, + %b, + %c, + i32 3) ret %res } @@ -284,10 +284,10 @@ ; CHECK-LABEL: smlslt_i32 ; CHECK: smlslt z0.s, z1.h, z2.h[1] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.smlslt.nxv4i32( %a, - %b, - %c, - i64 1) + %res = call @llvm.aarch64.sve.smlslt.lane.nxv4i32( %a, + %b, + %c, + i32 1) ret %res } @@ -297,10 +297,10 @@ ; CHECK-LABEL: smlslt_i32_2 ; CHECK: smlslt z0.s, z1.h, z2.h[7] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.smlslt.nxv4i32( %a, - %b, - %c, - i64 7) + %res = call @llvm.aarch64.sve.smlslt.lane.nxv4i32( %a, + %b, + %c, + i32 7) ret %res } @@ -310,10 +310,10 @@ ; CHECK-LABEL: smlslt_i64 ; CHECK: smlslt z0.d, z1.s, z2.s[0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.smlslt.nxv2i64( %a, - %b, - %c, - i64 0) + %res = call @llvm.aarch64.sve.smlslt.lane.nxv2i64( %a, + %b, + %c, + i32 0) ret %res } @@ -323,10 +323,10 @@ ; CHECK-LABEL: smlslt_i64_2 ; CHECK: smlslt z0.d, z1.s, z2.s[3] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.smlslt.nxv2i64( %a, - %b, - %c, - i64 3) + %res = call @llvm.aarch64.sve.smlslt.lane.nxv2i64( %a, + %b, + %c, + i32 3) ret %res } @@ -339,10 +339,10 @@ ; CHECK-LABEL: umlslb_i32 ; CHECK: umlslb z0.s, z1.h, z2.h[1] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.umlslb.nxv4i32( %a, - %b, - %c, - i64 1) + %res = call @llvm.aarch64.sve.umlslb.lane.nxv4i32( %a, + %b, + %c, + i32 1) ret %res } @@ -352,10 +352,10 @@ ; CHECK-LABEL: umlslb_i32_2 ; CHECK: umlslb z0.s, z1.h, z2.h[7] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.umlslb.nxv4i32( %a, - %b, - %c, - i64 7) + %res = call @llvm.aarch64.sve.umlslb.lane.nxv4i32( %a, + %b, + %c, + i32 7) ret %res } @@ -365,10 +365,10 @@ ; CHECK-LABEL: umlslb_i64 ; CHECK: umlslb z0.d, z1.s, z2.s[0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.umlslb.nxv2i64( %a, - %b, - %c, - i64 0) + %res = call @llvm.aarch64.sve.umlslb.lane.nxv2i64( %a, + %b, + %c, + i32 0) ret %res } @@ -378,10 +378,10 @@ ; CHECK-LABEL: umlslb_i64_2 ; CHECK: umlslb z0.d, z1.s, z2.s[3] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.umlslb.nxv2i64( %a, - %b, - %c, - i64 3) + %res = call @llvm.aarch64.sve.umlslb.lane.nxv2i64( %a, + %b, + %c, + i32 3) ret %res } @@ -394,10 +394,10 @@ ; CHECK-LABEL: umlslt_i32 ; CHECK: umlslt z0.s, z1.h, z2.h[1] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.umlslt.nxv4i32( %a, - %b, - %c, - i64 1) + %res = call @llvm.aarch64.sve.umlslt.lane.nxv4i32( %a, + %b, + %c, + i32 1) ret %res } @@ -407,10 +407,10 @@ ; CHECK-LABEL: umlslt_i32_2 ; CHECK: umlslt z0.s, z1.h, z2.h[7] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.umlslt.nxv4i32( %a, - %b, - %c, - i64 7) + %res = call @llvm.aarch64.sve.umlslt.lane.nxv4i32( %a, + %b, + %c, + i32 7) ret %res } @@ -420,10 +420,10 @@ ; CHECK-LABEL: umlslt_i64 ; CHECK: umlslt z0.d, z1.s, z2.s[0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.umlslt.nxv2i64( %a, - %b, - %c, - i64 0) + %res = call @llvm.aarch64.sve.umlslt.lane.nxv2i64( %a, + %b, + %c, + i32 0) ret %res } @@ -433,26 +433,254 @@ ; CHECK-LABEL: umlslt_i64_2 ; CHECK: umlslt z0.d, z1.s, z2.s[3] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sve.umlslt.nxv2i64( %a, - %b, - %c, - i64 3) + %res = call @llvm.aarch64.sve.umlslt.lane.nxv2i64( %a, + %b, + %c, + i32 3) ret %res } -declare @llvm.aarch64.sve.smlalb.nxv4i32(, , , i64) -declare @llvm.aarch64.sve.smlalb.nxv2i64(, , , i64) -declare @llvm.aarch64.sve.smlalt.nxv4i32(, , , i64) -declare @llvm.aarch64.sve.smlalt.nxv2i64(, , , i64) -declare @llvm.aarch64.sve.umlalb.nxv4i32(, , , i64) -declare @llvm.aarch64.sve.umlalb.nxv2i64(, , , i64) -declare @llvm.aarch64.sve.umlalt.nxv4i32(, , , i64) -declare @llvm.aarch64.sve.umlalt.nxv2i64(, , , i64) -declare @llvm.aarch64.sve.smlslb.nxv4i32(, , , i64) -declare @llvm.aarch64.sve.smlslb.nxv2i64(, , , i64) -declare @llvm.aarch64.sve.smlslt.nxv4i32(, , , i64) -declare @llvm.aarch64.sve.smlslt.nxv2i64(, , , i64) -declare @llvm.aarch64.sve.umlslb.nxv4i32(, , , i64) -declare @llvm.aarch64.sve.umlslb.nxv2i64(, , , i64) -declare @llvm.aarch64.sve.umlslt.nxv4i32(, , , i64) -declare @llvm.aarch64.sve.umlslt.nxv2i64(, , , i64) +; +; SQDMLALB +; +define @sqdmlalb_i32( %a, + %b, + %c) { +; CHECK-LABEL: sqdmlalb_i32 +; CHECK: sqdmlalb z0.s, z1.h, z2.h[1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmlalb.lane.nxv4i32( %a, + %b, + %c, + i32 1) + ret %res +} + +define @sqdmlalb_i32_2( %a, + %b, + %c) { +; CHECK-LABEL: sqdmlalb_i32_2 +; CHECK: sqdmlalb z0.s, z1.h, z2.h[7] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmlalb.lane.nxv4i32( %a, + %b, + %c, + i32 7) + ret %res +} + +define @sqdmlalb_i64( %a, + %b, + %c) { +; CHECK-LABEL: sqdmlalb_i64 +; CHECK: sqdmlalb z0.d, z1.s, z2.s[0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmlalb.lane.nxv2i64( %a, + %b, + %c, + i32 0) + ret %res +} + +define @sqdmlalb_i64_2( %a, + %b, + %c) { +; CHECK-LABEL: sqdmlalb_i64_2 +; CHECK: sqdmlalb z0.d, z1.s, z2.s[3] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmlalb.lane.nxv2i64( %a, + %b, + %c, + i32 3) + ret %res +} + +; +; SQDMLALT +; +define @sqdmlalt_i32( %a, + %b, + %c) { +; CHECK-LABEL: sqdmlalt_i32 +; CHECK: sqdmlalt z0.s, z1.h, z2.h[1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmlalt.lane.nxv4i32( %a, + %b, + %c, + i32 1) + ret %res +} + +define @sqdmlalt_i32_2( %a, + %b, + %c) { +; CHECK-LABEL: sqdmlalt_i32_2 +; CHECK: sqdmlalt z0.s, z1.h, z2.h[7] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmlalt.lane.nxv4i32( %a, + %b, + %c, + i32 7) + ret %res +} + +define @sqdmlalt_i64( %a, + %b, + %c) { +; CHECK-LABEL: sqdmlalt_i64 +; CHECK: sqdmlalt z0.d, z1.s, z2.s[0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmlalt.lane.nxv2i64( %a, + %b, + %c, + i32 0) + ret %res +} + +define @sqdmlalt_i64_2( %a, + %b, + %c) { +; CHECK-LABEL: sqdmlalt_i64_2 +; CHECK: sqdmlalt z0.d, z1.s, z2.s[3] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmlalt.lane.nxv2i64( %a, + %b, + %c, + i32 3) + ret %res +} + +; +; SQDMLSLB +; +define @sqdmlslb_i32( %a, + %b, + %c) { +; CHECK-LABEL: sqdmlslb_i32 +; CHECK: sqdmlslb z0.s, z1.h, z2.h[1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmlslb.lane.nxv4i32( %a, + %b, + %c, + i32 1) + ret %res +} + +define @sqdmlslb_i32_2( %a, + %b, + %c) { +; CHECK-LABEL: sqdmlslb_i32_2 +; CHECK: sqdmlslb z0.s, z1.h, z2.h[7] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmlslb.lane.nxv4i32( %a, + %b, + %c, + i32 7) + ret %res +} + +define @sqdmlslb_i64( %a, + %b, + %c) { +; CHECK-LABEL: sqdmlslb_i64 +; CHECK: sqdmlslb z0.d, z1.s, z2.s[0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmlslb.lane.nxv2i64( %a, + %b, + %c, + i32 0) + ret %res +} + +define @sqdmlslb_i64_2( %a, + %b, + %c) { +; CHECK-LABEL: sqdmlslb_i64_2 +; CHECK: sqdmlslb z0.d, z1.s, z2.s[3] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmlslb.lane.nxv2i64( %a, + %b, + %c, + i32 3) + ret %res +} + +; +; SQDMLSLT +; +define @sqdmlslt_i32( %a, + %b, + %c) { +; CHECK-LABEL: sqdmlslt_i32 +; CHECK: sqdmlslt z0.s, z1.h, z2.h[1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmlslt.lane.nxv4i32( %a, + %b, + %c, + i32 1) + ret %res +} + +define @sqdmlslt_i32_2( %a, + %b, + %c) { +; CHECK-LABEL: sqdmlslt_i32_2 +; CHECK: sqdmlslt z0.s, z1.h, z2.h[7] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmlslt.lane.nxv4i32( %a, + %b, + %c, + i32 7) + ret %res +} + +define @sqdmlslt_i64( %a, + %b, + %c) { +; CHECK-LABEL: sqdmlslt_i64 +; CHECK: sqdmlslt z0.d, z1.s, z2.s[0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmlslt.lane.nxv2i64( %a, + %b, + %c, + i32 0) + ret %res +} + +define @sqdmlslt_i64_2( %a, + %b, + %c) { +; CHECK-LABEL: sqdmlslt_i64_2 +; CHECK: sqdmlslt z0.d, z1.s, z2.s[3] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqdmlslt.lane.nxv2i64( %a, + %b, + %c, + i32 3) + ret %res +} + +declare @llvm.aarch64.sve.smlalb.lane.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.smlalb.lane.nxv2i64(, , , i32) +declare @llvm.aarch64.sve.smlalt.lane.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.smlalt.lane.nxv2i64(, , , i32) +declare @llvm.aarch64.sve.umlalb.lane.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.umlalb.lane.nxv2i64(, , , i32) +declare @llvm.aarch64.sve.umlalt.lane.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.umlalt.lane.nxv2i64(, , , i32) +declare @llvm.aarch64.sve.smlslb.lane.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.smlslb.lane.nxv2i64(, , , i32) +declare @llvm.aarch64.sve.smlslt.lane.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.smlslt.lane.nxv2i64(, , , i32) +declare @llvm.aarch64.sve.umlslb.lane.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.umlslb.lane.nxv2i64(, , , i32) +declare @llvm.aarch64.sve.umlslt.lane.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.umlslt.lane.nxv2i64(, , , i32) +declare @llvm.aarch64.sve.sqdmlalb.lane.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.sqdmlalb.lane.nxv2i64(, , , i32) +declare @llvm.aarch64.sve.sqdmlalt.lane.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.sqdmlalt.lane.nxv2i64(, , , i32) +declare @llvm.aarch64.sve.sqdmlslb.lane.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.sqdmlslb.lane.nxv2i64(, , , i32) +declare @llvm.aarch64.sve.sqdmlslt.lane.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.sqdmlslt.lane.nxv2i64(, , , i32)