diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3219,4 +3219,12 @@ def int_aarch64_sve_uzpq_x2 : SVE2_VG2_ZipUzp_Intrinsic; def int_aarch64_sve_uzp_x4 : SVE2_VG4_ZipUzp_Intrinsic; def int_aarch64_sve_uzpq_x4 : SVE2_VG4_ZipUzp_Intrinsic; + + // Vector dot-products (2-way) + def int_aarch64_sve_sdot_x2 : SVE2_3VectorArg_Long_Intrinsic; + def int_aarch64_sve_udot_x2 : SVE2_3VectorArg_Long_Intrinsic; + def int_aarch64_sve_fdot_x2 : SVE2_3VectorArg_Long_Intrinsic; + def int_aarch64_sve_sdot_lane_x2 : SVE2_3VectorArgIndexed_Long_Intrinsic; + def int_aarch64_sve_udot_lane_x2 : SVE2_3VectorArgIndexed_Long_Intrinsic; + def int_aarch64_sve_fdot_lane_x2 : SVE2_3VectorArgIndexed_Long_Intrinsic; } diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3690,17 +3690,17 @@ let Predicates = [HasSVE2p1_or_HasSME2] in { defm FCLAMP_ZZZ : sve2p1_fclamp<"fclamp", int_aarch64_sve_fclamp>; -def FDOT_ZZZ_S : sve_float_dot<0b0, "fdot">; -def FDOT_ZZZI_S : sve_float_dot_indexed<0b0, "fdot">; +defm FDOT_ZZZ_S : sve_float_dot<0b0, "fdot", nxv8f16, int_aarch64_sve_fdot_x2>; +defm FDOT_ZZZI_S : sve_float_dot_indexed<0b0, "fdot", nxv8f16, int_aarch64_sve_fdot_lane_x2>; def BFMLSLB_ZZZ_S : sve2_fp_mla_long<0b110, "bfmlslb">; def BFMLSLT_ZZZ_S : sve2_fp_mla_long<0b111, "bfmlslt">; def BFMLSLB_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b110, "bfmlslb">; def BFMLSLT_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b111, "bfmlslt">; -def SDOT_ZZZ_HtoS : sve2p1_two_way_dot_vv<"sdot", 0b0>; -def UDOT_ZZZ_HtoS : sve2p1_two_way_dot_vv<"udot", 0b1>; -def SDOT_ZZZI_HtoS : sve2p1_two_way_dot_vvi<"sdot", 0b0>; -def UDOT_ZZZI_HtoS : sve2p1_two_way_dot_vvi<"udot", 0b1>; +defm SDOT_ZZZ_HtoS : sve2p1_two_way_dot_vv<"sdot", 0b0, int_aarch64_sve_sdot_x2>; +defm UDOT_ZZZ_HtoS : sve2p1_two_way_dot_vv<"udot", 0b1, int_aarch64_sve_udot_x2>; +defm SDOT_ZZZI_HtoS : sve2p1_two_way_dot_vvi<"sdot", 0b0, int_aarch64_sve_sdot_lane_x2>; +defm UDOT_ZZZI_HtoS : sve2p1_two_way_dot_vvi<"udot", 0b1, int_aarch64_sve_udot_lane_x2>; defm CNTP_XCI : sve2p1_pcount_pn<"cntp", 0b000>; defm PEXT_PCI : sve2p1_pred_as_ctr_to_mask<"pext">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -9036,6 +9036,11 @@ let hasSideEffects = 0; } +multiclass sve2p1_two_way_dot_vv { + def NAME : sve2p1_two_way_dot_vv; + + def : SVE_3_Op_Pat(NAME)>; +} // SVE two-way dot product (indexed) class sve2p1_two_way_dot_vvi @@ -9059,6 +9064,11 @@ let hasSideEffects = 0; } +multiclass sve2p1_two_way_dot_vvi { + def NAME : sve2p1_two_way_dot_vvi; + + def : SVE_4_Op_Imm_Pat(NAME)>; +} class sve2p1_ptrue_pn sz, PNRP8to15RegOp pnrty> : I<(outs pnrty:$PNd), (ins ), mnemonic, "\t$PNd", diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-dots.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-dots.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-dots.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s + +define @sdot_x2( %zda, %zn, %zm) { +; CHECK-LABEL: sdot_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: sdot z0.s, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdot.x2.nxv4i32( %zda, %zn, %zm) + ret %out +} + +define @udot_x2( %zda, %zn, %zm) { +; CHECK-LABEL: udot_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: udot z0.s, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udot.x2.nxv4i32( %zda, %zn, %zm) + ret %out +} + +define @fdot_x2( %zda, %zn, %zm) { +; CHECK-LABEL: fdot_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: fdot z0.s, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fdot.x2.nxv4f32( %zda, %zn, %zm) + ret %out +} + +define @sdot_lane_x2( %zda, %zn, %zm) { +; CHECK-LABEL: sdot_lane_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: sdot z0.s, z1.h, z2.h[3] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sdot.lane.x2.nxv4i32( %zda, %zn, %zm, i32 3) + ret %out +} + +define @udot_lane_x2( %zda, %zn, %zm) { +; CHECK-LABEL: udot_lane_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: udot z0.s, z1.h, z2.h[3] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.udot.lane.x2.nxv4i32( %zda, %zn, %zm, i32 3) + ret %out +} + +define @fdot_lane_x2( %zda, %zn, %zm) { +; CHECK-LABEL: fdot_lane_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: fdot z0.s, z1.h, z2.h[3] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fdot.lane.x2.nxv4f32( %zda, %zn, %zm, i32 3) + ret %out +} + + +declare @llvm.aarch64.sve.sdot.x2.nxv4i32( %zda, %zn, %zm) +declare @llvm.aarch64.sve.udot.x2.nxv4i32( %zda, %zn, %zm) +declare @llvm.aarch64.sve.fdot.x2.nxv4f32( %zda, %zn, %zm) +declare @llvm.aarch64.sve.sdot.lane.x2.nxv4i32( %zda, %zn, %zm, i32) +declare @llvm.aarch64.sve.udot.lane.x2.nxv4i32( %zda, %zn, %zm, i32) +declare @llvm.aarch64.sve.fdot.lane.x2.nxv4f32( %zda, %zn, %zm, i32)