diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2752,6 +2752,17 @@ LLVMMatchType<0>, llvm_i32_ty], [ImmArg>]>; + class SME2_CVT_VG2_SINGLE_Intrinsic + : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>], + [llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; + + class SME2_CVT_VG2_SINGLE_BF16_Intrinsic + : DefaultAttrsIntrinsic<[llvm_nxv8bf16_ty], + [llvm_nxv4f32_ty, llvm_nxv4f32_ty], + [IntrNoMem]>; + + // // Multi-vector fused multiply-add/subtract // @@ -2804,4 +2815,10 @@ def int_aarch64_sme_suvdot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic; def int_aarch64_sme_usvdot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic; + + // + // Multi-vector floating-point CVT from single-precision to interleaved half-precision/BFloat16 + // + def int_aarch64_sve_fcvtn_x2 : SME2_CVT_VG2_SINGLE_Intrinsic; + def int_aarch64_sve_bfcvtn_x2 : SME2_CVT_VG2_SINGLE_BF16_Intrinsic; } diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -363,14 +363,14 @@ defm UMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlsl", 0b11, int_aarch64_sme_umlsl_vg2x2>; defm UMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlsl", 0b11, int_aarch64_sme_umlsl_vg2x4>; -defm FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b0000>; -defm FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b0001>; -defm BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b1000>; -defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b1001>; - -defm SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b0110>; -defm UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b0111>; -defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b1110>; +defm FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b0000, nxv8f16, nxv4f32, null_frag>; +defm FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b0001, nxv8f16, nxv4f32, int_aarch64_sve_fcvtn_x2>; +defm BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b1000, nxv8bf16, nxv4f32, null_frag>; +defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b1001, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvtn_x2>; + +defm SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b0110, nxv8i16, nxv4i32, null_frag>; +defm UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b0111, nxv8i16, nxv4i32, null_frag>; +defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b1110, nxv8i16, nxv4i32, null_frag>; defm SQCVT_Z4Z : sme2_int_cvt_vg4_single<"sqcvt", 0b000>; defm UQCVT_Z4Z : sme2_int_cvt_vg4_single<"uqcvt", 0b001>; defm SQCVTU_Z4Z : sme2_int_cvt_vg4_single<"sqcvtu", 0b100>; diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -133,6 +133,10 @@ (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3), zpr_ty:$Zm, imm_ty:$i)>; +class SME2_Cvt_VG2_Pat + : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2)), + (!cast(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1))>; + //===----------------------------------------------------------------------===// // SME Outer Products //===----------------------------------------------------------------------===// @@ -2063,8 +2067,10 @@ // SME2 multi-vec FP down convert two registers // SME2 multi-vec int down convert two registers -multiclass sme2_cvt_vg2_single op> { +multiclass sme2_cvt_vg2_single op, ValueType out_vt, + ValueType in_vt, SDPatternOperator intrinsic> { def NAME : sme2_cvt_vg2_single; + def : SME2_Cvt_VG2_Pat; } class sme2_cvt_unpk_vector_vg2sz, bits<3> op, bit u, RegisterOperand first_ty, diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2,+bf16 -verify-machineinstrs < %s | FileCheck %s + +; +; FCVTN +; +define @multi_vector_cvtn_x2_f16( %zn1, %zn2) { +; CHECK-LABEL: multi_vector_cvtn_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fcvtn z0.h, { z0.s, z1.s } +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.fcvtn.x2.nxv4f32( %zn1, %zn2) + ret %res +} + +; +; BFCVTN +; + +define @multi_vector_bfcvtn_x2( %zn1, %zn2) { +; CHECK-LABEL: multi_vector_bfcvtn_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: bfcvtn z0.h, { z0.s, z1.s } +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.bfcvtn.x2( %zn1, %zn2) + ret %res +} + +declare @llvm.aarch64.sve.fcvtn.x2.nxv4f32(, ) +declare @llvm.aarch64.sve.bfcvtn.x2(, )