Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3035,6 +3035,18 @@ } } + // + // Multi-vector floating point min/max number + // + + foreach instr = ["fmaxnm", "fminnm"] in { + def int_aarch64_sve_ # instr # _single_x2 : SME2_VG2_Multi_Single_Intrinsic; + def int_aarch64_sve_ # instr # _single_x4 : SME2_VG4_Multi_Single_Intrinsic; + + def int_aarch64_sve_ # instr # _x2 : SME2_VG2_Multi_Multi_Intrinsic; + def int_aarch64_sve_ # instr # _x4 : SME2_VG4_Multi_Multi_Intrinsic; + } + // // Multi-vector vertical dot-products // Index: llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -5163,6 +5163,62 @@ AArch64::FMIN_VG4_4Z4Z_D})) SelectDestructiveMultiIntrinsic(Node, 4, true, Op); return; + case Intrinsic::aarch64_sve_fmaxnm_single_x2 : + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_S, + AArch64::FMAXNM_VG2_2ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 2, false, Op); + return; + case Intrinsic::aarch64_sve_fmaxnm_single_x4 : + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_S, + AArch64::FMAXNM_VG4_4ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 4, false, Op); + return; + case Intrinsic::aarch64_sve_fminnm_single_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_S, + AArch64::FMINNM_VG2_2ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 2, false, Op); + return; + case Intrinsic::aarch64_sve_fminnm_single_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_S, + AArch64::FMINNM_VG4_4ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 4, false, Op); + return; + case Intrinsic::aarch64_sve_fmaxnm_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_S, + AArch64::FMAXNM_VG2_2Z2Z_D})) + SelectDestructiveMultiIntrinsic(Node, 2, true, Op); + return; + case Intrinsic::aarch64_sve_fmaxnm_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FMAXNM_VG4_4Z4Z_H, AArch64::FMAXNM_VG4_4Z4Z_S, + AArch64::FMAXNM_VG4_4Z4Z_D})) + SelectDestructiveMultiIntrinsic(Node, 4, true, Op); + return; + case Intrinsic::aarch64_sve_fminnm_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_S, + AArch64::FMINNM_VG2_2Z2Z_D})) + SelectDestructiveMultiIntrinsic(Node, 2, true, Op); + return; + case Intrinsic::aarch64_sve_fminnm_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FMINNM_VG4_4Z4Z_H, AArch64::FMINNM_VG4_4Z4Z_S, + AArch64::FMINNM_VG4_4Z4Z_D})) + SelectDestructiveMultiIntrinsic(Node, 4, true, Op); + return; case Intrinsic::aarch64_sve_fcvts_x2: SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS); return; Index: llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll =================================================================== --- llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll +++ llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll @@ -673,6 +673,230 @@ ret { , , , } %res } +; FMAXNM (Single, x2) + +define { , } @multi_vec_maxnm_single_x2_f16( %dummy, %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_maxnm_single_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmaxnm { z4.h, z5.h }, { z4.h, z5.h }, z3.h +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_maxnm_single_x2_f32( %dummy, %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_maxnm_single_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmaxnm { z4.s, z5.s }, { z4.s, z5.s }, z3.s +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_maxnm_single_x2_f64( %dummy, %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_maxnm_single_x2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmaxnm { z4.d, z5.d }, { z4.d, z5.d }, z3.d +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64( %zdn1, %zdn2, %zm) + ret { , } %res +} + +; FMAXNM (Single, x4) + +define { , , , } +@multi_vec_maxnm_single_x4_f16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_maxnm_single_x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmaxnm { z24.h - z27.h }, { z24.h - z27.h }, z5.h +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_maxnm_single_x4_f32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_maxnm_single_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmaxnm { z24.s - z27.s }, { z24.s - z27.s }, z5.s +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_maxnm_single_x4_f64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_maxnm_single_x4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmaxnm { z24.d - z27.d }, { z24.d - z27.d }, z5.d +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +; FMAXNM (Multi, x2) + +define { , } @multi_vec_maxnm_x2_f16( %dummy, %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_maxnm_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmaxnm { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_maxnm_x2_f32( %dummy, %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_maxnm_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmaxnm { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_maxnm_x2_f64( %dummy, %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_maxnm_x2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmaxnm { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +; FMAXNM (Multi, x4) + +define { , , , } +@multi_vec_maxnm_x4_f16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_maxnm_x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmaxnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fmaxnm.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_maxnm_x4_f32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_maxnm_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmaxnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fmaxnm.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_maxnm_x4_f64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_maxnm_x4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmaxnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fmaxnm.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + declare { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8(, , ) declare { , } @llvm.aarch64.sve.smax.single.x2.nxv8i16(, , ) declare { , } @llvm.aarch64.sve.smax.single.x2.nxv4i32(, , ) @@ -742,3 +966,25 @@ @llvm.aarch64.sve.fmax.x4.nxv4f32(, , , , , , , ) declare { , , , } @llvm.aarch64.sve.fmax.x4.nxv2f64(, , , , , , , ) + +declare { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16(, , ) +declare { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32(, , ) +declare { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64(, , ) + +declare { , , , } + @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16(, , , , ) +declare { , , , } + @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32(, , , , ) +declare { , , , } + @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64(, , , , ) + +declare { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16(, , , ) +declare { , } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32(, , , ) +declare { , } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64(, , , ) + +declare { , , , } + @llvm.aarch64.sve.fmaxnm.x4.nxv8f16(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.fmaxnm.x4.nxv4f32(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.fmaxnm.x4.nxv2f64(, , , , , , , ) Index: llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll =================================================================== --- llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll +++ llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+b16 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s ; SMIN (Single, x2) @@ -673,6 +673,230 @@ ret { , , , } %res } +; FMINNM (Single, x2) + +define { , } @multi_vec_minnm_single_x2_f16( %dummy, %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_minnm_single_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fminnm { z4.h, z5.h }, { z4.h, z5.h }, z3.h +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_minnm_single_x2_f32( %dummy, %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_minnm_single_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fminnm { z4.s, z5.s }, { z4.s, z5.s }, z3.s +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_minnm_single_x2_f64( %dummy, %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_minnm_single_x2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fminnm { z4.d, z5.d }, { z4.d, z5.d }, z3.d +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64( %zdn1, %zdn2, %zm) + ret { , } %res +} + +; FMINNM (Single, x4) + +define { , , , } +@multi_vec_minnm_single_x4_f16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_minnm_single_x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fminnm { z24.h - z27.h }, { z24.h - z27.h }, z5.h +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fminnm.single.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_minnm_single_x4_f32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_minnm_single_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fminnm { z24.s - z27.s }, { z24.s - z27.s }, z5.s +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fminnm.single.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_minnm_single_x4_f64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_minnm_single_x4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fminnm { z24.d - z27.d }, { z24.d - z27.d }, z5.d +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fminnm.single.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +; FMINNM (Multi, x2) + +define { , } @multi_vec_minnm_x2_f16( %dummy, %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_minnm_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fminnm { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fminnm.x2.nxv8f16( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_minnm_x2_f32( %dummy, %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_minnm_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fminnm { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fminnm.x2.nxv4f32( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_minnm_x2_f64( %dummy, %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_minnm_x2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fminnm { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fminnm.x2.nxv2f64( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +; FMINNM (Multi, x4) + +define { , , , } +@multi_vec_minnm_x4_f16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_minnm_x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fminnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fminnm.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_minnm_x4_f32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_minnm_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fminnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fminnm.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_minnm_x4_f64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_minnm_x4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z27.d, z4.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fminnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fminnm.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + declare { , } @llvm.aarch64.sve.smin.single.x2.nxv16i8(, , ) declare { , } @llvm.aarch64.sve.smin.single.x2.nxv8i16(, , ) declare { , } @llvm.aarch64.sve.smin.single.x2.nxv4i32(, , ) @@ -742,3 +966,25 @@ @llvm.aarch64.sve.fmin.x4.nxv4f32(, , , , , , , ) declare { , , , } @llvm.aarch64.sve.fmin.x4.nxv2f64(, , , , , , , ) + +declare { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16(, , ) +declare { , } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32(, , ) +declare { , } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64(, , ) + +declare { , , , } + @llvm.aarch64.sve.fminnm.single.x4.nxv8f16(, , , , ) +declare { , , , } + @llvm.aarch64.sve.fminnm.single.x4.nxv4f32(, , , , ) +declare { , , , } + @llvm.aarch64.sve.fminnm.single.x4.nxv2f64(, , , , ) + +declare { , } @llvm.aarch64.sve.fminnm.x2.nxv8f16(, , , ) +declare { , } @llvm.aarch64.sve.fminnm.x2.nxv4f32(, , , ) +declare { , } @llvm.aarch64.sve.fminnm.x2.nxv2f64(, , , ) + +declare { , , , } + @llvm.aarch64.sve.fminnm.x4.nxv8f16(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.fminnm.x4.nxv4f32(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.fminnm.x4.nxv2f64(, , , , , , , )