Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2908,6 +2908,20 @@ def int_aarch64_sve_sqdmulh_vgx2 : SME2_VG2_Multi_Multi_Intrinsic; def int_aarch64_sve_sqdmulh_vgx4 : SME2_VG4_Multi_Multi_Intrinsic; + // + // Multi-vector min/max + // + + foreach ty = ["f", "s", "u"] in { + foreach instr = ["max", "min"] in { + def int_aarch64_sve_ # ty # instr # _single_x2 : SME2_VG2_Multi_Single_Intrinsic; + def int_aarch64_sve_ # ty # instr # _single_x4 : SME2_VG4_Multi_Single_Intrinsic; + + def int_aarch64_sve_ # ty # instr # _x2 : SME2_VG2_Multi_Multi_Intrinsic; + def int_aarch64_sve_ # ty # instr # _x4 : SME2_VG4_Multi_Multi_Intrinsic; + } + } + // // Multi-vector vertical dot-products // Index: llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -4863,6 +4863,174 @@ AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D})) SelectWhilePair(Node, Op); return; + case Intrinsic::aarch64_sve_smax_single_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H, + AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 2, false, Op); + return; + case Intrinsic::aarch64_sve_umax_single_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H, + AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 2, false, Op); + return; + case Intrinsic::aarch64_sve_fmax_single_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_S, + AArch64::FMAX_VG2_2ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 2, false, Op); + return; + case Intrinsic::aarch64_sve_smax_single_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H, + AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 4, false, Op); + return; + case Intrinsic::aarch64_sve_umax_single_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H, + AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 4, false, Op); + return; + case Intrinsic::aarch64_sve_fmax_single_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_S, + AArch64::FMAX_VG4_4ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 4, false, Op); + return; + case Intrinsic::aarch64_sve_smin_single_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H, + AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 2, false, Op); + return; + case Intrinsic::aarch64_sve_umin_single_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H, + AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 2, false, Op); + return; + case Intrinsic::aarch64_sve_fmin_single_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_S, + AArch64::FMIN_VG2_2ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 2, false, Op); + return; + case Intrinsic::aarch64_sve_smin_single_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H, + AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 4, false, Op); + return; + case Intrinsic::aarch64_sve_umin_single_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H, + AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 4, false, Op); + return; + case Intrinsic::aarch64_sve_fmin_single_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_S, + AArch64::FMIN_VG4_4ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 4, false, Op); + return; + case Intrinsic::aarch64_sve_smax_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H, + AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D})) + SelectDestructiveMultiIntrinsic(Node, 2, true, Op); + return; + case Intrinsic::aarch64_sve_umax_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H, + AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D})) + SelectDestructiveMultiIntrinsic(Node, 2, true, Op); + return; + case Intrinsic::aarch64_sve_fmax_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_S, + AArch64::FMAX_VG2_2Z2Z_D})) + SelectDestructiveMultiIntrinsic(Node, 2, true, Op); + return; + case Intrinsic::aarch64_sve_smax_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H, + AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D})) + SelectDestructiveMultiIntrinsic(Node, 4, true, Op); + return; + case Intrinsic::aarch64_sve_umax_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H, + AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D})) + SelectDestructiveMultiIntrinsic(Node, 4, true, Op); + return; + case Intrinsic::aarch64_sve_fmax_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FMAX_VG4_4Z4Z_H, AArch64::FMAX_VG4_4Z4Z_S, + AArch64::FMAX_VG4_4Z4Z_D})) + SelectDestructiveMultiIntrinsic(Node, 4, true, Op); + return; + case Intrinsic::aarch64_sve_smin_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H, + AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D})) + SelectDestructiveMultiIntrinsic(Node, 2, true, Op); + return; + case Intrinsic::aarch64_sve_umin_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H, + AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D})) + SelectDestructiveMultiIntrinsic(Node, 2, true, Op); + return; + case Intrinsic::aarch64_sve_fmin_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_S, + AArch64::FMIN_VG2_2Z2Z_D})) + SelectDestructiveMultiIntrinsic(Node, 2, true, Op); + return; + case Intrinsic::aarch64_sve_smin_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H, + AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D})) + SelectDestructiveMultiIntrinsic(Node, 4, true, Op); + return; + case Intrinsic::aarch64_sve_umin_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H, + AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D})) + SelectDestructiveMultiIntrinsic(Node, 4, true, Op); + return; + case Intrinsic::aarch64_sve_fmin_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {0, AArch64::FMIN_VG4_4Z4Z_H, AArch64::FMIN_VG4_4Z4Z_S, + AArch64::FMIN_VG4_4Z4Z_D})) + SelectDestructiveMultiIntrinsic(Node, 4, true, Op); + return; case Intrinsic::aarch64_sve_fcvts_x2: SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS); return; Index: llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll @@ -0,0 +1,744 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s + +; SMAX (Single, x2) + +define { , } @multi_vec_max_single_x2_s8( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_max_single_x2_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: smax { z0.b, z1.b }, { z0.b, z1.b }, z2.b +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_max_single_x2_s16( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_max_single_x2_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: smax { z0.h, z1.h }, { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.smax.single.x2.nxv8i16( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_max_single_x2_s32( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_max_single_x2_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: smax { z0.s, z1.s }, { z0.s, z1.s }, z2.s +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.smax.single.x2.nxv4i32( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_max_single_x2_s64( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_max_single_x2_s64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: smax { z0.d, z1.d }, { z0.d, z1.d }, z2.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.smax.single.x2.nxv2i64( %zdn1, %zdn2, %zm) + ret { , } %res +} + +; UMAX (Single, x2) + +define { , } @multi_vec_max_single_x2_u8( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_max_single_x2_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: umax { z0.b, z1.b }, { z0.b, z1.b }, z2.b +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.umax.single.x2.nxv16i8( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_max_single_x2_u16( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_max_single_x2_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: umax { z0.h, z1.h }, { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.umax.single.x2.nxv8i16( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_max_single_x2_u32( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_max_single_x2_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: umax { z0.s, z1.s }, { z0.s, z1.s }, z2.s +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.umax.single.x2.nxv4i32( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_max_single_x2_u64( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_max_single_x2_u64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: umax { z0.d, z1.d }, { z0.d, z1.d }, z2.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.umax.single.x2.nxv2i64( %zdn1, %zdn2, %zm) + ret { , } %res +} + +; FMAX (Single, x2) + +define { , } @multi_vec_max_single_x2_f16( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_max_single_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmax { z0.h, z1.h }, { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8f16( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_max_single_x2_f32( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_max_single_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmax { z0.s, z1.s }, { z0.s, z1.s }, z2.s +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmax.single.x2.nxv4f32( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_max_single_x2_f64( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_max_single_x2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmax { z0.d, z1.d }, { z0.d, z1.d }, z2.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmax.single.x2.nxv2f64( %zdn1, %zdn2, %zm) + ret { , } %res +} + +; SMAX (Single, x4) + +define { , , , } +@multi_vec_max_single_x4_s8( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_max_single_x4_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: smax { z0.b - z3.b }, { z0.b - z3.b }, z4.b +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.smax.single.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_max_single_x4_s16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_max_single_x4_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: smax { z0.h - z3.h }, { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.smax.single.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_max_single_x4_s32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_max_single_x4_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: smax { z0.s - z3.s }, { z0.s - z3.s }, z4.s +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.smax.single.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_max_single_x4_s64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_max_single_x4_s64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: smax { z0.d - z3.d }, { z0.d - z3.d }, z4.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.smax.single.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +; UMAX (Single, x4) + +define { , , , } +@multi_vec_max_single_x4_u8( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_max_single_x4_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: umax { z0.b - z3.b }, { z0.b - z3.b }, z4.b +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.umax.single.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_max_single_x4_u16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_max_single_x4_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: umax { z0.h - z3.h }, { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.umax.single.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_max_single_x4_u32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_max_single_x4_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: umax { z0.s - z3.s }, { z0.s - z3.s }, z4.s +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.umax.single.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_max_single_x4_u64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_max_single_x4_u64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: umax { z0.d - z3.d }, { z0.d - z3.d }, z4.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.umax.single.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +; FMAX (SINGLE, x4) + +define { , , , } +@multi_vec_max_single_x4_f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_max_single_x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmax { z0.h - z3.h }, { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fmax.single.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_max_single_x4_f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_max_single_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmax { z0.s - z3.s }, { z0.s - z3.s }, z4.s +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fmax.single.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_max_single_x4_f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_max_single_x4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmax { z0.d - z3.d }, { z0.d - z3.d }, z4.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fmax.single.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +; SMAX (Multi, x2) + +define { , } @multi_vec_max_multi_x2_s8( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_max_multi_x2_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: smax { z0.b, z1.b }, { z0.b, z1.b }, { z2.b, z3.b } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.smax.x2.nxv16i8( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_max_multi_x2_s16( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_max_multi_x2_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: smax { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.smax.x2.nxv8i16( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_max_multi_x2_s32( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_max_multi_x2_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: smax { z0.s, z1.s }, { z0.s, z1.s }, { z2.s, z3.s } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.smax.x2.nxv4i32( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_max_multi_x2_s64( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_max_multi_x2_s64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: smax { z0.d, z1.d }, { z0.d, z1.d }, { z2.d, z3.d } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.smax.x2.nxv2i64( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +; UMAX (Multi, x2) + +define { , } @multi_vec_max_multi_x2_u8( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_max_multi_x2_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: umax { z0.b, z1.b }, { z0.b, z1.b }, { z2.b, z3.b } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.umax.x2.nxv16i8( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_max_multi_x2_u16( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_max_multi_x2_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: umax { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.umax.x2.nxv8i16( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_max_multi_x2_u32( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_max_multi_x2_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: umax { z0.s, z1.s }, { z0.s, z1.s }, { z2.s, z3.s } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.umax.x2.nxv4i32( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_max_multi_x2_u64( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_max_multi_x2_u64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: umax { z0.d, z1.d }, { z0.d, z1.d }, { z2.d, z3.d } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.umax.x2.nxv2i64( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +; FMAX (Multi, x2) + +define { , } @multi_vec_max_multi_x2_f16( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_max_multi_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: fmax { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmax.x2.nxv8f16( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_max_multi_x2_f32( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_max_multi_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: fmax { z0.s, z1.s }, { z0.s, z1.s }, { z2.s, z3.s } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmax.x2.nxv4f32( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_max_multi_x2_f64( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_max_multi_x2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: fmax { z0.d, z1.d }, { z0.d, z1.d }, { z2.d, z3.d } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmax.x2.nxv2f64( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +; SMAX (Multi, x4) + +define { , , , } +@multi_vec_max_multi_x4_s8( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_max_multi_x4_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: smax { z0.b - z3.b }, { z0.b - z3.b }, { z4.b - z7.b } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.smax.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_max_multi_x4_s16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_max_multi_x4_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: smax { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.smax.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_max_multi_x4_s32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_max_multi_x4_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: smax { z0.s - z3.s }, { z0.s - z3.s }, { z4.s - z7.s } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.smax.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_max_multi_x4_s64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_max_multi_x4_s64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: smax { z0.d - z3.d }, { z0.d - z3.d }, { z4.d - z7.d } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.smax.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +; UMAX (Multi, x4) + +define { , , , } +@multi_vec_max_multi_x4_u8( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_max_multi_x4_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: umax { z0.b - z3.b }, { z0.b - z3.b }, { z4.b - z7.b } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.umax.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_max_multi_x4_u16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_max_multi_x4_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: umax { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.umax.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_max_multi_x4_u32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_max_multi_x4_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: umax { z0.s - z3.s }, { z0.s - z3.s }, { z4.s - z7.s } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.umax.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_max_multi_x4_u64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_max_multi_x4_u64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: umax { z0.d - z3.d }, { z0.d - z3.d }, { z4.d - z7.d } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.umax.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +; FMAX (Multi, x4) + +define { , , , } +@multi_vec_max_multi_x4_f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_max_multi_x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: fmax { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fmax.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_max_multi_x4_f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_max_multi_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: fmax { z0.s - z3.s }, { z0.s - z3.s }, { z4.s - z7.s } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fmax.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_max_multi_x4_f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_max_multi_x4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: fmax { z0.d - z3.d }, { z0.d - z3.d }, { z4.d - z7.d } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fmax.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +declare { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8(, , ) +declare { , } @llvm.aarch64.sve.smax.single.x2.nxv8i16(, , ) +declare { , } @llvm.aarch64.sve.smax.single.x2.nxv4i32(, , ) +declare { , } @llvm.aarch64.sve.smax.single.x2.nxv2i64(, , ) + +declare { , } @llvm.aarch64.sve.umax.single.x2.nxv16i8(, , ) +declare { , } @llvm.aarch64.sve.umax.single.x2.nxv8i16(, , ) +declare { , } @llvm.aarch64.sve.umax.single.x2.nxv4i32(, , ) +declare { , } @llvm.aarch64.sve.umax.single.x2.nxv2i64(, , ) + +declare { , } @llvm.aarch64.sve.fmax.single.x2.nxv8f16(, , ) +declare { , } @llvm.aarch64.sve.fmax.single.x2.nxv4f32(, , ) +declare { , } @llvm.aarch64.sve.fmax.single.x2.nxv2f64(, , ) + +declare { , , , } @llvm.aarch64.sve.smax.single.x4.nxv16i8(, , , , ) +declare { , , , } @llvm.aarch64.sve.smax.single.x4.nxv8i16(, , , , ) +declare { , , , } @llvm.aarch64.sve.smax.single.x4.nxv4i32(, , , , ) +declare { , , , } @llvm.aarch64.sve.smax.single.x4.nxv2i64(, , , , ) + +declare { , , , } @llvm.aarch64.sve.umax.single.x4.nxv16i8(, , , , ) +declare { , , , } @llvm.aarch64.sve.umax.single.x4.nxv8i16(, , , , ) +declare { , , , } @llvm.aarch64.sve.umax.single.x4.nxv4i32(, , , , ) +declare { , , , } @llvm.aarch64.sve.umax.single.x4.nxv2i64(, , , , ) + +declare { , , , } + @llvm.aarch64.sve.fmax.single.x4.nxv8f16(, , , , ) +declare { , , , } + @llvm.aarch64.sve.fmax.single.x4.nxv4f32(, , , , ) +declare { , , , } + @llvm.aarch64.sve.fmax.single.x4.nxv2f64(, , , , ) + +declare { , } @llvm.aarch64.sve.smax.x2.nxv16i8(, , , ) +declare { , } @llvm.aarch64.sve.smax.x2.nxv8i16(, , , ) +declare { , } @llvm.aarch64.sve.smax.x2.nxv4i32(, , , ) +declare { , } @llvm.aarch64.sve.smax.x2.nxv2i64(, , , ) + +declare { , } @llvm.aarch64.sve.umax.x2.nxv16i8(, , , ) +declare { , } @llvm.aarch64.sve.umax.x2.nxv8i16(, , , ) +declare { , } @llvm.aarch64.sve.umax.x2.nxv4i32(, , , ) +declare { , } @llvm.aarch64.sve.umax.x2.nxv2i64(, , , ) + +declare { , } @llvm.aarch64.sve.fmax.x2.nxv8f16(, , , ) +declare { , } @llvm.aarch64.sve.fmax.x2.nxv4f32(, , , ) +declare { , } @llvm.aarch64.sve.fmax.x2.nxv2f64(, , , ) + +declare { , , , } + @llvm.aarch64.sve.smax.x4.nxv16i8(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.smax.x4.nxv8i16(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.smax.x4.nxv4i32(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.smax.x4.nxv2i64(, , , , , , , ) + +declare { , , , } + @llvm.aarch64.sve.umax.x4.nxv16i8(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.umax.x4.nxv8i16(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.umax.x4.nxv4i32(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.umax.x4.nxv2i64(, , , , , , , ) + +declare { , , , } + @llvm.aarch64.sve.fmax.x4.nxv8f16(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.fmax.x4.nxv4f32(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.fmax.x4.nxv2f64(, , , , , , , ) Index: llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll @@ -0,0 +1,744 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+b16 -verify-machineinstrs < %s | FileCheck %s + +; SMIN (Single, x2) + +define { , } @multi_vec_min_single_x2_s8( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_min_single_x2_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: smin { z0.b, z1.b }, { z0.b, z1.b }, z2.b +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.smin.single.x2.nxv16i8( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_min_single_x2_s16( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_min_single_x2_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: smin { z0.h, z1.h }, { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.smin.single.x2.nxv8i16( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_min_single_x2_s32( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_min_single_x2_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: smin { z0.s, z1.s }, { z0.s, z1.s }, z2.s +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.smin.single.x2.nxv4i32( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_min_single_x2_s64( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_min_single_x2_s64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: smin { z0.d, z1.d }, { z0.d, z1.d }, z2.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.smin.single.x2.nxv2i64( %zdn1, %zdn2, %zm) + ret { , } %res +} + +; UMIN (Single, x2) + +define { , } @multi_vec_min_single_x2_u8( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_min_single_x2_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: umin { z0.b, z1.b }, { z0.b, z1.b }, z2.b +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.umin.single.x2.nxv16i8( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_min_single_x2_u16( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_min_single_x2_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: umin { z0.h, z1.h }, { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.umin.single.x2.nxv8i16( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_min_single_x2_u32( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_min_single_x2_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: umin { z0.s, z1.s }, { z0.s, z1.s }, z2.s +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.umin.single.x2.nxv4i32( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_min_single_x2_u64( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_min_single_x2_u64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: umin { z0.d, z1.d }, { z0.d, z1.d }, z2.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.umin.single.x2.nxv2i64( %zdn1, %zdn2, %zm) + ret { , } %res +} + +; FMIN (Single, x2) + +define { , } @multi_vec_min_single_x2_f16( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_min_single_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmin { z0.h, z1.h }, { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8f16( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_min_single_x2_f32( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_min_single_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmin { z0.s, z1.s }, { z0.s, z1.s }, z2.s +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmin.single.x2.nxv4f32( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , } @multi_vec_min_single_x2_f64( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_min_single_x2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmin { z0.d, z1.d }, { z0.d, z1.d }, z2.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmin.single.x2.nxv2f64( %zdn1, %zdn2, %zm) + ret { , } %res +} + +; SMIN (Single, x4) + +define { , , , } +@multi_vec_min_single_x4_s8( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_min_single_x4_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: smin { z0.b - z3.b }, { z0.b - z3.b }, z4.b +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.smin.single.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_min_single_x4_s16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_min_single_x4_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: smin { z0.h - z3.h }, { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.smin.single.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_min_single_x4_s32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_min_single_x4_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: smin { z0.s - z3.s }, { z0.s - z3.s }, z4.s +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.smin.single.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_min_single_x4_s64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_min_single_x4_s64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: smin { z0.d - z3.d }, { z0.d - z3.d }, z4.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.smin.single.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +; UMIN (Single, x4) + +define { , , , } +@multi_vec_min_single_x4_u8( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_min_single_x4_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: umin { z0.b - z3.b }, { z0.b - z3.b }, z4.b +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.umin.single.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_min_single_x4_u16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_min_single_x4_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: umin { z0.h - z3.h }, { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.umin.single.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_min_single_x4_u32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_min_single_x4_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: umin { z0.s - z3.s }, { z0.s - z3.s }, z4.s +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.umin.single.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_min_single_x4_u64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_min_single_x4_u64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: umin { z0.d - z3.d }, { z0.d - z3.d }, z4.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.umin.single.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +; FMIN (SINGLE, x4) + +define { , , , } +@multi_vec_min_single_x4_f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_min_single_x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmin { z0.h - z3.h }, { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fmin.single.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_min_single_x4_f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_min_single_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmin { z0.s - z3.s }, { z0.s - z3.s }, z4.s +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fmin.single.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , , , } +@multi_vec_min_single_x4_f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_min_single_x4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmin { z0.d - z3.d }, { z0.d - z3.d }, z4.d +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fmin.single.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +; SMIN (Multi, x2) + +define { , } @multi_vec_min_multi_x2_s8( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_min_multi_x2_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: smin { z0.b, z1.b }, { z0.b, z1.b }, { z2.b, z3.b } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.smin.x2.nxv16i8( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_min_multi_x2_s16( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_min_multi_x2_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: smin { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.smin.x2.nxv8i16( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_min_multi_x2_s32( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_min_multi_x2_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: smin { z0.s, z1.s }, { z0.s, z1.s }, { z2.s, z3.s } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.smin.x2.nxv4i32( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_min_multi_x2_s64( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_min_multi_x2_s64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: smin { z0.d, z1.d }, { z0.d, z1.d }, { z2.d, z3.d } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.smin.x2.nxv2i64( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +; UMIN (Multi, x2) + +define { , } @multi_vec_min_multi_x2_u8( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_min_multi_x2_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: umin { z0.b, z1.b }, { z0.b, z1.b }, { z2.b, z3.b } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.umin.x2.nxv16i8( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_min_multi_x2_u16( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_min_multi_x2_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: umin { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.umin.x2.nxv8i16( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_min_multi_x2_u32( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_min_multi_x2_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: umin { z0.s, z1.s }, { z0.s, z1.s }, { z2.s, z3.s } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.umin.x2.nxv4i32( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_min_multi_x2_u64( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_min_multi_x2_u64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: umin { z0.d, z1.d }, { z0.d, z1.d }, { z2.d, z3.d } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.umin.x2.nxv2i64( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +; FMIN (Multi, x2) + +define { , } @multi_vec_min_multi_x2_f16( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_min_multi_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: fmin { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmin.x2.nxv8f16( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_min_multi_x2_f32( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_min_multi_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: fmin { z0.s, z1.s }, { z0.s, z1.s }, { z2.s, z3.s } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmin.x2.nxv4f32( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , } @multi_vec_min_multi_x2_f64( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_min_multi_x2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: fmin { z0.d, z1.d }, { z0.d, z1.d }, { z2.d, z3.d } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmin.x2.nxv2f64( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +; SMIN (Multi, x4) + +define { , , , } +@multi_vec_min_multi_x4_s8( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_min_multi_x4_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: smin { z0.b - z3.b }, { z0.b - z3.b }, { z4.b - z7.b } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.smin.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_min_multi_x4_s16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_min_multi_x4_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: smin { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.smin.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_min_multi_x4_s32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_min_multi_x4_s32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: smin { z0.s - z3.s }, { z0.s - z3.s }, { z4.s - z7.s } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.smin.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_min_multi_x4_s64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_min_multi_x4_s64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: smin { z0.d - z3.d }, { z0.d - z3.d }, { z4.d - z7.d } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.smin.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +; UMIN (Multi, x4) + +define { , , , } +@multi_vec_min_multi_x4_u8( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_min_multi_x4_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: umin { z0.b - z3.b }, { z0.b - z3.b }, { z4.b - z7.b } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.umin.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_min_multi_x4_u16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_min_multi_x4_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: umin { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.umin.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_min_multi_x4_u32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_min_multi_x4_u32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: umin { z0.s - z3.s }, { z0.s - z3.s }, { z4.s - z7.s } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.umin.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_min_multi_x4_u64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_min_multi_x4_u64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: umin { z0.d - z3.d }, { z0.d - z3.d }, { z4.d - z7.d } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.umin.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +; FMIN (Multi, x4) + +define { , , , } +@multi_vec_min_multi_x4_f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_min_multi_x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: fmin { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fmin.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_min_multi_x4_f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_min_multi_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: fmin { z0.s - z3.s }, { z0.s - z3.s }, { z4.s - z7.s } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fmin.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , , , } +@multi_vec_min_multi_x4_f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_min_multi_x4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: fmin { z0.d - z3.d }, { z0.d - z3.d }, { z4.d - z7.d } +; CHECK-NEXT: ret + %res = call { , , , } + @llvm.aarch64.sve.fmin.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, + %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +declare { , } @llvm.aarch64.sve.smin.single.x2.nxv16i8(, , ) +declare { , } @llvm.aarch64.sve.smin.single.x2.nxv8i16(, , ) +declare { , } @llvm.aarch64.sve.smin.single.x2.nxv4i32(, , ) +declare { , } @llvm.aarch64.sve.smin.single.x2.nxv2i64(, , ) + +declare { , } @llvm.aarch64.sve.umin.single.x2.nxv16i8(, , ) +declare { , } @llvm.aarch64.sve.umin.single.x2.nxv8i16(, , ) +declare { , } @llvm.aarch64.sve.umin.single.x2.nxv4i32(, , ) +declare { , } @llvm.aarch64.sve.umin.single.x2.nxv2i64(, , ) + +declare { , } @llvm.aarch64.sve.fmin.single.x2.nxv8f16(, , ) +declare { , } @llvm.aarch64.sve.fmin.single.x2.nxv4f32(, , ) +declare { , } @llvm.aarch64.sve.fmin.single.x2.nxv2f64(, , ) + +declare { , , , } @llvm.aarch64.sve.smin.single.x4.nxv16i8(, , , , ) +declare { , , , } @llvm.aarch64.sve.smin.single.x4.nxv8i16(, , , , ) +declare { , , , } @llvm.aarch64.sve.smin.single.x4.nxv4i32(, , , , ) +declare { , , , } @llvm.aarch64.sve.smin.single.x4.nxv2i64(, , , , ) + +declare { , , , } @llvm.aarch64.sve.umin.single.x4.nxv16i8(, , , , ) +declare { , , , } @llvm.aarch64.sve.umin.single.x4.nxv8i16(, , , , ) +declare { , , , } @llvm.aarch64.sve.umin.single.x4.nxv4i32(, , , , ) +declare { , , , } @llvm.aarch64.sve.umin.single.x4.nxv2i64(, , , , ) + +declare { , , , } + @llvm.aarch64.sve.fmin.single.x4.nxv8f16(, , , , ) +declare { , , , } + @llvm.aarch64.sve.fmin.single.x4.nxv4f32(, , , , ) +declare { , , , } + @llvm.aarch64.sve.fmin.single.x4.nxv2f64(, , , , ) + +declare { , } @llvm.aarch64.sve.smin.x2.nxv16i8(, , , ) +declare { , } @llvm.aarch64.sve.smin.x2.nxv8i16(, , , ) +declare { , } @llvm.aarch64.sve.smin.x2.nxv4i32(, , , ) +declare { , } @llvm.aarch64.sve.smin.x2.nxv2i64(, , , ) + +declare { , } @llvm.aarch64.sve.umin.x2.nxv16i8(, , , ) +declare { , } @llvm.aarch64.sve.umin.x2.nxv8i16(, , , ) +declare { , } @llvm.aarch64.sve.umin.x2.nxv4i32(, , , ) +declare { , } @llvm.aarch64.sve.umin.x2.nxv2i64(, , , ) + +declare { , } @llvm.aarch64.sve.fmin.x2.nxv8f16(, , , ) +declare { , } @llvm.aarch64.sve.fmin.x2.nxv4f32(, , , ) +declare { , } @llvm.aarch64.sve.fmin.x2.nxv2f64(, , , ) + +declare { , , , } + @llvm.aarch64.sve.smin.x4.nxv16i8(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.smin.x4.nxv8i16(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.smin.x4.nxv4i32(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.smin.x4.nxv2i64(, , , , , , , ) + +declare { , , , } + @llvm.aarch64.sve.umin.x4.nxv16i8(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.umin.x4.nxv8i16(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.umin.x4.nxv4i32(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.umin.x4.nxv2i64(, , , , , , , ) + +declare { , , , } + @llvm.aarch64.sve.fmin.x4.nxv8f16(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.fmin.x4.nxv4f32(, , , , , , , ) +declare { , , , } + @llvm.aarch64.sve.fmin.x4.nxv2f64(, , , , , , , )