diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2267,6 +2267,10 @@ def int_aarch64_sve_smmla : SVE_MatMul_Intrinsic; def int_aarch64_sve_usmmla : SVE_MatMul_Intrinsic; +def int_aarch64_sve_usdot : AdvSIMD_SVE_DOT_Intrinsic; +def int_aarch64_sve_usdot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic; +def int_aarch64_sve_sudot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic; + // // SVE ACLE: 7.4/5. FP64/FP32 matrix multiply extensions // diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1853,9 +1853,9 @@ defm SMMLA_ZZZ : sve_int_matmul<0b00, "smmla", int_aarch64_sve_smmla>; defm UMMLA_ZZZ : sve_int_matmul<0b11, "ummla", int_aarch64_sve_ummla>; defm USMMLA_ZZZ : sve_int_matmul<0b10, "usmmla", int_aarch64_sve_usmmla>; - def USDOT_ZZZ : sve_int_dot_mixed<"usdot">; - def USDOT_ZZZI : sve_int_dot_mixed_indexed<0, "usdot">; - def SUDOT_ZZZI : sve_int_dot_mixed_indexed<1, "sudot">; + defm USDOT_ZZZ : sve_int_dot_mixed<"usdot", int_aarch64_sve_usdot>; + defm USDOT_ZZZI : sve_int_dot_mixed_indexed<0, "usdot", int_aarch64_sve_usdot_lane>; + defm SUDOT_ZZZI : sve_int_dot_mixed_indexed<1, "sudot", int_aarch64_sve_sudot_lane>; } let Predicates = [HasSVE, HasMatMulFP32] in { diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -7628,12 +7628,18 @@ let ElementSize = ZPR32.ElementSize; } +multiclass sve_int_dot_mixed { + def NAME : sve_int_dot_mixed; + + def : SVE_3_Op_Pat(NAME)>; +} + //===----------------------------------------------------------------------===// // SVE Integer Dot Product Mixed Sign - Indexed Group //===----------------------------------------------------------------------===// class sve_int_dot_mixed_indexed -: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR8:$Zn, ZPR3b8:$Zm, VectorIndexS:$idx), +: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR8:$Zn, ZPR3b8:$Zm, VectorIndexS32b:$idx), asm, "\t$Zda, $Zn, $Zm$idx", "", []>, Sched<[]> { bits<5> Zda; bits<5> Zn; @@ -7652,6 +7658,12 @@ let ElementSize = ZPR32.ElementSize; } +multiclass sve_int_dot_mixed_indexed { + def NAME : sve_int_dot_mixed_indexed; + + def : SVE_4_Op_Imm_Pat(NAME)>; +} + //===----------------------------------------------------------------------===// // SVE Floating Point Matrix Multiply Accumulate Group //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll @@ -27,7 +27,93 @@ ret %val } +define @usdot( %r, %a, %b) nounwind { +entry: +; CHECK-LABEL: usdot: +; CHECK-NEXT: usdot z0.s, z1.b, z2.b +; CHECK-NEXT : ret + %val = tail call @llvm.aarch64.sve.usdot.nxv4i32( %r, %a, %b) + ret %val +} + +define @usdot_lane_0( %r, %a, %b) nounwind { +entry: +; CHECK-LABEL: usdot_lane_0: +; CHECK-NEXT: usdot z0.s, z1.b, z2.b[0] +; CHECK-NEXT : ret + %val = tail call @llvm.aarch64.sve.usdot.lane.nxv4i32( %r, %a, %b, i32 0) + ret %val +} + +define @usdot_lane_1( %r, %a, %b) nounwind { +entry: +; CHECK-LABEL: usdot_lane_1: +; CHECK-NEXT: usdot z0.s, z1.b, z2.b[1] +; CHECK-NEXT : ret + %val = tail call @llvm.aarch64.sve.usdot.lane.nxv4i32( %r, %a, %b, i32 1) + ret %val +} + +define @usdot_lane_2( %r, %a, %b) nounwind { +entry: +; CHECK-LABEL: usdot_lane_2: +; CHECK-NEXT: usdot z0.s, z1.b, z2.b[2] +; CHECK-NEXT : ret + %val = tail call @llvm.aarch64.sve.usdot.lane.nxv4i32( %r, %a, %b, i32 2) + ret %val +} + +define @usdot_lane_3( %r, %a, %b) nounwind { +entry: +; CHECK-LABEL: usdot_lane_3: +; CHECK-NEXT: usdot z0.s, z1.b, z2.b[3] +; CHECK-NEXT : ret + %val = tail call @llvm.aarch64.sve.usdot.lane.nxv4i32( %r, %a, %b, i32 3) + ret %val +} + +define @sudot_lane_0( %r, %a, %b) nounwind { +entry: +; CHECK-LABEL: sudot_lane_0: +; CHECK-NEXT: sudot z0.s, z1.b, z2.b[0] +; CHECK-NEXT : ret + %val = tail call @llvm.aarch64.sve.sudot.lane.nxv4i32( %r, %a, %b, i32 0) + ret %val +} + +define @sudot_lane_1( %r, %a, %b) nounwind { +entry: +; CHECK-LABEL: sudot_lane_1: +; CHECK-NEXT: sudot z0.s, z1.b, z2.b[1] +; CHECK-NEXT : ret + %val = tail call @llvm.aarch64.sve.sudot.lane.nxv4i32( %r, %a, %b, i32 1) + ret %val +} + +define @sudot_lane_2( %r, %a, %b) nounwind { +entry: +; CHECK-LABEL: sudot_lane_2: +; CHECK-NEXT: sudot z0.s, z1.b, z2.b[2] +; CHECK-NEXT : ret + %val = tail call @llvm.aarch64.sve.sudot.lane.nxv4i32( %r, %a, %b, i32 2) + ret %val +} + +define @sudot_lane_3( %r, %a, %b) nounwind { +entry: +; CHECK-LABEL: sudot_lane_3: +; CHECK-NEXT: sudot z0.s, z1.b, z2.b[3] +; CHECK-NEXT : ret + %val = tail call @llvm.aarch64.sve.sudot.lane.nxv4i32( %r, %a, %b, i32 3) + ret %val +} + + declare @llvm.aarch64.sve.smmla.nxv4i32(, , ) declare @llvm.aarch64.sve.ummla.nxv4i32(, , ) declare @llvm.aarch64.sve.usmmla.nxv4i32(, , ) +declare @llvm.aarch64.sve.usdot.nxv4i32(, , ) +declare @llvm.aarch64.sve.usdot.lane.nxv4i32(, , , i32) +declare @llvm.aarch64.sve.sudot.lane.nxv4i32(, , , i32) +