diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3044,6 +3044,13 @@ def int_aarch64_sve_sqdmulh_vgx2 : SME2_VG2_Multi_Multi_Intrinsic; def int_aarch64_sve_sqdmulh_vgx4 : SME2_VG4_Multi_Multi_Intrinsic; + // Multi-vector floating-point round to integral value + + foreach inst = ["a", "m", "n", "p"] in { + def int_aarch64_sve_frint # inst # _x2 : SVE2_VG2_ZipUzp_Intrinsic; + def int_aarch64_sve_frint # inst # _x4 : SVE2_VG4_ZipUzp_Intrinsic; + } + // // Multi-vector min/max // diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -367,6 +367,7 @@ void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode); void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs, bool IsTupleInput, unsigned Opc); + void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode); template void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg, @@ -1874,6 +1875,13 @@ CurDAG->RemoveDeadNode(N); } +void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs, + unsigned Opcode) { + if (N->getValueType(0) != MVT::nxv4f32) + return; + SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode); +} + void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs, unsigned Op) { SDLoc DL(N); @@ -5380,6 +5388,30 @@ SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, AArch64::UZP_VG4_4Z4Z_Q); return; + case Intrinsic::aarch64_sve_frinta_x2: + SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S); + return; + case Intrinsic::aarch64_sve_frinta_x4: + SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S); + return; + case Intrinsic::aarch64_sve_frintm_x2: + SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S); + return; + case Intrinsic::aarch64_sve_frintm_x4: + SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S); + return; + case Intrinsic::aarch64_sve_frintn_x2: + SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S); + return; + case Intrinsic::aarch64_sve_frintn_x4: + SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S); + return; + case Intrinsic::aarch64_sve_frintp_x2: + SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S); + return; + case Intrinsic::aarch64_sve_frintp_x4: + SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S); + return; } break; } diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-frint.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-frint.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-frint.ll @@ -0,0 +1,118 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s + +; FRINTA + +define { , } @multi_vec_frinta_x2_f32( %unused, %zn1, %zn2) { +; CHECK-LABEL: multi_vec_frinta_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: frinta { z0.s, z1.s }, { z2.s, z3.s } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.frinta.x2.nxv4f32( %zn1, %zn2) + ret { , } %res +} + +define { , , , } @multi_vec_frinta_x4_f32( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vec_frinta_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: frinta { z0.s - z3.s }, { z4.s - z7.s } +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.frinta.x4.nxv4f32( %zn1, %zn2, %zn3, %zn4) + ret { , , , } %res +} + +; FRINTM + +define { , } @multi_vec_frintm_x2_f32( %unused, %zn1, %zn2) { +; CHECK-LABEL: multi_vec_frintm_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: frintm { z0.s, z1.s }, { z2.s, z3.s } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.frintm.x2.nxv4f32( %zn1, %zn2) + ret { , } %res +} + +define { , , , } @multi_vec_frintm_x4_f32( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vec_frintm_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: frintm { z0.s - z3.s }, { z4.s - z7.s } +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.frintm.x4.nxv4f32( %zn1, %zn2, %zn3, %zn4) + ret { , , , } %res +} + +; FRINTN + +define { , } @multi_vec_frintn_x2_f32( %unused, %zn1, %zn2) { +; CHECK-LABEL: multi_vec_frintn_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: frintn { z0.s, z1.s }, { z2.s, z3.s } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.frintn.x2.nxv4f32( %zn1, %zn2) + ret { , } %res +} + +define { , , , } @multi_vec_frintn_x4_f32( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vec_frintn_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: frintn { z0.s - z3.s }, { z4.s - z7.s } +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.frintn.x4.nxv4f32( %zn1, %zn2, %zn3, %zn4) + ret { , , , } %res +} + +; FRINTP + +define { , } @multi_vec_frintp_x2_f32( %unused, %zn1, %zn2) { +; CHECK-LABEL: multi_vec_frintp_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: frintp { z0.s, z1.s }, { z2.s, z3.s } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.frintp.x2.nxv4f32( %zn1, %zn2) + ret { , } %res +} + +define { , , , } @multi_vec_frintp_x4_f32( %unused, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vec_frintp_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: frintp { z0.s - z3.s }, { z4.s - z7.s } +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.frintp.x4.nxv4f32( %zn1, %zn2, %zn3, %zn4) + ret { , , , } %res +} + +declare { , } @llvm.aarch64.sve.frinta.x2.nxv4f32(, ) +declare { , , , } @llvm.aarch64.sve.frinta.x4.nxv4f32(, , , ) + +declare { , } @llvm.aarch64.sve.frintm.x2.nxv4f32(, ) +declare { , , , } @llvm.aarch64.sve.frintm.x4.nxv4f32(, , , ) + +declare { , } @llvm.aarch64.sve.frintn.x2.nxv4f32(, ) +declare { , , , } @llvm.aarch64.sve.frintn.x4.nxv4f32(, , , ) + +declare { , } @llvm.aarch64.sve.frintp.x2.nxv4f32(, ) +declare { , , , } @llvm.aarch64.sve.frintp.x4.nxv4f32(, , , )