diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -13641,6 +13641,8 @@ ``poison`` otherwise. +.. _int_smax: + '``llvm.smax.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -13670,6 +13672,8 @@ type must match the argument type. +.. _int_smin: + '``llvm.smin.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -13699,6 +13703,8 @@ type must match the argument type. +.. _int_umax: + '``llvm.umax.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -13728,6 +13734,8 @@ type must match the argument type. +.. _int_umin: + '``llvm.umin.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -18868,6 +18876,198 @@ %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison +.. _int_vp_smax: + +'``llvm.vp.smax.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x i32> @llvm.vp.smax.v16i32 (<16 x i32> , <16 x i32> , <16 x i1> , i32 ) + declare @llvm.vp.smax.nxv4i32 ( , , , i32 ) + declare <256 x i64> @llvm.vp.smax.v256i64 (<256 x i64> , <256 x i64> , <256 x i1> , i32 ) + +Overview: +""""""""" + +Predicated integer signed maximum of two vectors of integers. + + +Arguments: +"""""""""" + +The first two operands and the result have the same vector of integer type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation. + +Semantics: +"""""""""" + +The '``llvm.vp.smax``' intrinsic performs integer signed maximum (:ref:`smax `) +of the first and second vector operand on each enabled lane. The result on +disabled lanes is a :ref:`poison value `. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b) + %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison + + +.. _int_vp_smin: + +'``llvm.vp.smin.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x i32> @llvm.vp.smin.v16i32 (<16 x i32> , <16 x i32> , <16 x i1> , i32 ) + declare @llvm.vp.smin.nxv4i32 ( , , , i32 ) + declare <256 x i64> @llvm.vp.smin.v256i64 (<256 x i64> , <256 x i64> , <256 x i1> , i32 ) + +Overview: +""""""""" + +Predicated integer signed minimum of two vectors of integers. + + +Arguments: +"""""""""" + +The first two operands and the result have the same vector of integer type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation. + +Semantics: +"""""""""" + +The '``llvm.vp.smin``' intrinsic performs integer signed minimum (:ref:`smin `) +of the first and second vector operand on each enabled lane. The result on +disabled lanes is a :ref:`poison value `. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> %b) + %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison + + +.. _int_vp_umax: + +'``llvm.vp.umax.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x i32> @llvm.vp.umax.v16i32 (<16 x i32> , <16 x i32> , <16 x i1> , i32 ) + declare @llvm.vp.umax.nxv4i32 ( , , , i32 ) + declare <256 x i64> @llvm.vp.umax.v256i64 (<256 x i64> , <256 x i64> , <256 x i1> , i32 ) + +Overview: +""""""""" + +Predicated integer unsigned maximum of two vectors of integers. + + +Arguments: +"""""""""" + +The first two operands and the result have the same vector of integer type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation. + +Semantics: +"""""""""" + +The '``llvm.vp.umax``' intrinsic performs integer unsigned maximum (:ref:`umax `) +of the first and second vector operand on each enabled lane. The result on +disabled lanes is a :ref:`poison value `. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> %b) + %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison + + +.. _int_vp_umin: + +'``llvm.vp.umin.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x i32> @llvm.vp.umin.v16i32 (<16 x i32> , <16 x i32> , <16 x i1> , i32 ) + declare @llvm.vp.umin.nxv4i32 ( , , , i32 ) + declare <256 x i64> @llvm.vp.umin.v256i64 (<256 x i64> , <256 x i64> , <256 x i1> , i32 ) + +Overview: +""""""""" + +Predicated integer unsigned minimum of two vectors of integers. + + +Arguments: +"""""""""" + +The first two operands and the result have the same vector of integer type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation. + +Semantics: +"""""""""" + +The '``llvm.vp.umin``' intrinsic performs integer unsigned minimum (:ref:`smin `) +of the first and second vector operand on each enabled lane. The result on +disabled lanes is a :ref:`poison value `. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> %b) + %also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> poison + + .. _int_vp_copysign: '``llvm.vp.copysign.*``' Intrinsics diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1530,6 +1530,26 @@ LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; + def int_vp_smin : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_smax : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_umin : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_umax : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; // Floating-point arithmetic def int_vp_fadd : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -196,6 +196,21 @@ #undef HELPER_REGISTER_BINARY_INT_VP +// llvm.vp.smin(x,y,mask,vlen) +BEGIN_REGISTER_VP(vp_smin, 2, 3, VP_SMIN, -1) +END_REGISTER_VP(vp_smin, VP_SMIN) + +// llvm.vp.smax(x,y,mask,vlen) +BEGIN_REGISTER_VP(vp_smax, 2, 3, VP_SMAX, -1) +END_REGISTER_VP(vp_smax, VP_SMAX) + +// llvm.vp.umin(x,y,mask,vlen) +BEGIN_REGISTER_VP(vp_umin, 2, 3, VP_UMIN, -1) +END_REGISTER_VP(vp_umin, VP_UMIN) + +// llvm.vp.umax(x,y,mask,vlen) +BEGIN_REGISTER_VP(vp_umax, 2, 3, VP_UMAX, -1) +END_REGISTER_VP(vp_umax, VP_UMAX) ///// } Integer Arithmetic ///// Floating-Point Arithmetic { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -165,11 +165,13 @@ case ISD::VP_SUB: case ISD::VP_MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; + case ISD::VP_SMIN: case ISD::VP_SMAX: case ISD::SDIV: case ISD::SREM: case ISD::VP_SDIV: case ISD::VP_SREM: Res = PromoteIntRes_SExtIntBinOp(N); break; + case ISD::VP_UMIN: case ISD::VP_UMAX: case ISD::UDIV: case ISD::UREM: case ISD::VP_UDIV: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1092,10 +1092,10 @@ case ISD::UREM: case ISD::VP_UREM: case ISD::SREM: case ISD::VP_SREM: case ISD::FREM: case ISD::VP_FREM: - case ISD::SMIN: - case ISD::SMAX: - case ISD::UMIN: - case ISD::UMAX: + case ISD::SMIN: case ISD::VP_SMIN: + case ISD::SMAX: case ISD::VP_SMAX: + case ISD::UMIN: case ISD::VP_UMIN: + case ISD::UMAX: case ISD::VP_UMAX: case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: @@ -3933,10 +3933,10 @@ case ISD::FMAXNUM: case ISD::VP_FMAXNUM: case ISD::FMINIMUM: case ISD::FMAXIMUM: - case ISD::SMIN: - case ISD::SMAX: - case ISD::UMIN: - case ISD::UMAX: + case ISD::SMIN: case ISD::VP_SMIN: + case ISD::SMAX: case ISD::VP_SMAX: + case ISD::UMIN: case ISD::VP_UMIN: + case ISD::UMAX: case ISD::VP_UMAX: case ISD::UADDSAT: case ISD::SADDSAT: case ISD::USUBSAT: diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -454,7 +454,8 @@ ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN, ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND, - ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE}; + ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, + ISD::VP_SMIN, ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX }; static const unsigned FloatingPointVPOps[] = { ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, @@ -3975,6 +3976,14 @@ if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) return lowerVPSetCCMaskOp(Op, DAG); return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL, /*HasMergeOp*/ true); + case ISD::VP_SMIN: + return lowerVPOp(Op, DAG, RISCVISD::SMIN_VL, /*HasMergeOp*/ true); + case ISD::VP_SMAX: + return lowerVPOp(Op, DAG, RISCVISD::SMAX_VL, /*HasMergeOp*/ true); + case ISD::VP_UMIN: + return lowerVPOp(Op, DAG, RISCVISD::UMIN_VL, /*HasMergeOp*/ true); + case ISD::VP_UMAX: + return lowerVPOp(Op, DAG, RISCVISD::UMAX_VL, /*HasMergeOp*/ true); case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: return lowerVPStridedLoad(Op, DAG); case ISD::EXPERIMENTAL_VP_STRIDED_STORE: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll @@ -0,0 +1,1203 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <8 x i7> @llvm.vp.smax.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) + +define <8 x i7> @vmax_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vadd.vv v9, v9, v9 +; CHECK-NEXT: vsra.vi v9, v9, 1 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i7> @llvm.vp.smax.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) + ret <8 x i7> %v +} + +declare <2 x i8> @llvm.vp.smax.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) + +define <2 x i8> @vmax_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.smax.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vmax_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.smax.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vmax_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.smax.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vmax_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.smax.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.vp.smax.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) + +define <4 x i8> @vmax_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i8> @llvm.vp.smax.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vmax_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.smax.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vmax_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.smax.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vmax_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.smax.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vmax_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.smax.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +declare <5 x i8> @llvm.vp.smax.v5i8(<5 x i8>, <5 x i8>, <5 x i1>, i32) + +define <5 x i8> @vmax_vv_v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <5 x i8> @llvm.vp.smax.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vmax_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.smax.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vmax_vx_v5i8(<5 x i8> %va, i8 %b, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.smax.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vmax_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.smax.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +declare <8 x i8> @llvm.vp.smax.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) + +define <8 x i8> @vmax_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i8> @llvm.vp.smax.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vmax_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.smax.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vmax_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.smax.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vmax_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.smax.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.vp.smax.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32) + +define <16 x i8> @vmax_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.vp.smax.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vmax_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.smax.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vmax_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.smax.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vmax_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.smax.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +declare <256 x i8> @llvm.vp.smax.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32) + +define <256 x i8> @vmax_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v258i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v25, (a1) +; CHECK-NEXT: addi a4, a2, -128 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: bltu a2, a4, .LBB22_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a4 +; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a2, a3, .LBB22_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: .LBB22_4: +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.smax.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + ret <256 x i8> %v +} + +define <256 x i8> @vmax_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v258i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a1, -128 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: bltu a1, a2, .LBB23_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: vmax.vx v16, v16, a0 +; CHECK-NEXT: bltu a1, a2, .LBB23_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: .LBB23_4: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %head = insertelement <256 x i1> poison, i1 true, i32 0 + %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.smax.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + ret <256 x i8> %v +} + +; Test splitting when the %evl is a known constant. + +define <256 x i8> @vmax_vx_v258i8_evl129(<256 x i8> %va, i8 %b, <256 x i1> %m) { +; CHECK-LABEL: vmax_vx_v258i8_evl129: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu +; CHECK-NEXT: vlm.v v24, (a1) +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.smax.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129) + ret <256 x i8> %v +} + +; FIXME: The upper half is doing nothing. + +define <256 x i8> @vmax_vx_v258i8_evl128(<256 x i8> %va, i8 %b, <256 x i1> %m) { +; CHECK-LABEL: vmax_vx_v258i8_evl128: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu +; CHECK-NEXT: vlm.v v24, (a1) +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.smax.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128) + ret <256 x i8> %v +} + +declare <2 x i16> @llvm.vp.smax.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32) + +define <2 x i16> @vmax_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.vp.smax.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vmax_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.smax.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vmax_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.smax.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vmax_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.smax.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.vp.smax.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32) + +define <4 x i16> @vmax_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i16> @llvm.vp.smax.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vmax_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.smax.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vmax_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.smax.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vmax_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.smax.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.vp.smax.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32) + +define <8 x i16> @vmax_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.vp.smax.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vmax_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.smax.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vmax_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.smax.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vmax_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.smax.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.vp.smax.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32) + +define <16 x i16> @vmax_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <16 x i16> @llvm.vp.smax.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vmax_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.smax.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vmax_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.smax.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vmax_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.smax.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.vp.smax.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32) + +define <2 x i32> @vmax_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.vp.smax.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vmax_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.smax.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vmax_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.smax.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vmax_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.smax.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.vp.smax.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define <4 x i32> @vmax_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vmax_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vmax_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vmax_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.vp.smax.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) + +define <8 x i32> @vmax_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vmax_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vmax_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vmax_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.vp.smax.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32) + +define <16 x i32> @vmax_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.vp.smax.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vmax_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.smax.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vmax_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.smax.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vmax_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.smax.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.vp.smax.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32) + +define <2 x i64> @vmax_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.vp.smax.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vmax_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.smax.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vmax_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vmax.vv v8, v8, v9, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; RV64-NEXT: vmax.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.smax.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vmax_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vmax.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmax.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.smax.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.vp.smax.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32) + +define <4 x i64> @vmax_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <4 x i64> @llvm.vp.smax.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vmax_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.smax.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vmax_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vmax.vv v8, v8, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; RV64-NEXT: vmax.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.smax.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vmax_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vmax.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmax.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.smax.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.vp.smax.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) + +define <8 x i64> @vmax_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <8 x i64> @llvm.vp.smax.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmax_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.smax.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmax_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vmax.vv v8, v8, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV64-NEXT: vmax.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.smax.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmax_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vmax.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmax.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.smax.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.vp.smax.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32) + +define <16 x i64> @vmax_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <16 x i64> @llvm.vp.smax.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmax_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.smax.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmax_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vmax.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vmax.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.smax.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmax_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v16i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vmax.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v16i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmax.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.smax.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +; Test that split-legalization works as expected. + +declare <32 x i64> @llvm.vp.smax.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) + +define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v32i64: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v1, v0 +; RV32-NEXT: li a1, 0 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: bltu a0, a2, .LBB74_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: .LBB74_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: li a1, 16 +; RV32-NEXT: vmax.vv v16, v16, v24, v0.t +; RV32-NEXT: bltu a0, a1, .LBB74_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: li a0, 16 +; RV32-NEXT: .LBB74_4: +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vmax.vv v8, v8, v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v32i64: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v24, v0 +; RV64-NEXT: li a2, 0 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: bltu a0, a1, .LBB74_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: .LBB74_2: +; RV64-NEXT: li a1, -1 +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV64-NEXT: li a2, 16 +; RV64-NEXT: vmax.vx v16, v16, a1, v0.t +; RV64-NEXT: bltu a0, a2, .LBB74_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: li a0, 16 +; RV64-NEXT: .LBB74_4: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vmax.vx v8, v8, a1, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.smax.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + ret <32 x i64> %v +} + +define <32 x i64> @vmax_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { +; RV32-LABEL: vmax_vi_v32i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: li a1, 0 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: bltu a0, a2, .LBB75_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: .LBB75_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: li a1, 16 +; RV32-NEXT: vmax.vv v16, v16, v24 +; RV32-NEXT: bltu a0, a1, .LBB75_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: li a0, 16 +; RV32-NEXT: .LBB75_4: +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmax.vv v8, v8, v24 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vi_v32i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: li a2, 0 +; RV64-NEXT: bltu a0, a1, .LBB75_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: .LBB75_2: +; RV64-NEXT: li a1, -1 +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: li a2, 16 +; RV64-NEXT: vmax.vx v16, v16, a1 +; RV64-NEXT: bltu a0, a2, .LBB75_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: li a0, 16 +; RV64-NEXT: .LBB75_4: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vmax.vx v8, v8, a1 +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %head = insertelement <32 x i1> poison, i1 true, i32 0 + %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.smax.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + ret <32 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll @@ -0,0 +1,1202 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <8 x i7> @llvm.vp.umax.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) + +define <8 x i7> @vmax_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i7: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 127 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i7> @llvm.vp.umax.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) + ret <8 x i7> %v +} + +declare <2 x i8> @llvm.vp.umax.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) + +define <2 x i8> @vmax_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.umax.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vmax_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.umax.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vmax_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.umax.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vmax_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.umax.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.vp.umax.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) + +define <4 x i8> @vmax_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i8> @llvm.vp.umax.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vmax_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.umax.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vmax_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.umax.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vmax_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.umax.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vmax_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.umax.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +declare <5 x i8> @llvm.vp.umax.v5i8(<5 x i8>, <5 x i8>, <5 x i1>, i32) + +define <5 x i8> @vmax_vv_v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <5 x i8> @llvm.vp.umax.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vmax_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.umax.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vmax_vx_v5i8(<5 x i8> %va, i8 %b, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.umax.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vmax_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.umax.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +declare <8 x i8> @llvm.vp.umax.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) + +define <8 x i8> @vmax_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i8> @llvm.vp.umax.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vmax_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.umax.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vmax_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.umax.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vmax_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.umax.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.vp.umax.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32) + +define <16 x i8> @vmax_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.vp.umax.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vmax_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.umax.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vmax_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.umax.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vmax_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.umax.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +declare <256 x i8> @llvm.vp.umax.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32) + +define <256 x i8> @vmax_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v258i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v25, (a1) +; CHECK-NEXT: addi a4, a2, -128 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: bltu a2, a4, .LBB22_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a4 +; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a2, a3, .LBB22_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: .LBB22_4: +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.umax.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + ret <256 x i8> %v +} + +define <256 x i8> @vmax_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v258i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a1, -128 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: bltu a1, a2, .LBB23_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: vmaxu.vx v16, v16, a0 +; CHECK-NEXT: bltu a1, a2, .LBB23_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: .LBB23_4: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %head = insertelement <256 x i1> poison, i1 true, i32 0 + %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.umax.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + ret <256 x i8> %v +} + +; Test splitting when the %evl is a known constant. + +define <256 x i8> @vmax_vx_v258i8_evl129(<256 x i8> %va, i8 %b, <256 x i1> %m) { +; CHECK-LABEL: vmax_vx_v258i8_evl129: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu +; CHECK-NEXT: vlm.v v24, (a1) +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.umax.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129) + ret <256 x i8> %v +} + +; FIXME: The upper half is doing nothing. + +define <256 x i8> @vmax_vx_v258i8_evl128(<256 x i8> %va, i8 %b, <256 x i1> %m) { +; CHECK-LABEL: vmax_vx_v258i8_evl128: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu +; CHECK-NEXT: vlm.v v24, (a1) +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.umax.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128) + ret <256 x i8> %v +} + +declare <2 x i16> @llvm.vp.umax.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32) + +define <2 x i16> @vmax_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.vp.umax.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vmax_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.umax.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vmax_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.umax.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vmax_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.umax.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.vp.umax.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32) + +define <4 x i16> @vmax_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i16> @llvm.vp.umax.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vmax_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.umax.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vmax_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.umax.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vmax_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.umax.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.vp.umax.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32) + +define <8 x i16> @vmax_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.vp.umax.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vmax_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.umax.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vmax_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.umax.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vmax_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.umax.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.vp.umax.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32) + +define <16 x i16> @vmax_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <16 x i16> @llvm.vp.umax.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vmax_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.umax.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vmax_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.umax.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vmax_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.umax.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.vp.umax.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32) + +define <2 x i32> @vmax_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vmax_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vmax_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vmax_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.vp.umax.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define <4 x i32> @vmax_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vmax_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vmax_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vmax_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.vp.umax.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) + +define <8 x i32> @vmax_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vmax_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vmax_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vmax_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.vp.umax.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32) + +define <16 x i32> @vmax_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.vp.umax.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vmax_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.umax.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vmax_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.umax.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vmax_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.umax.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.vp.umax.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32) + +define <2 x i64> @vmax_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.vp.umax.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vmax_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.umax.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vmax_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vmaxu.vv v8, v8, v9, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; RV64-NEXT: vmaxu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.umax.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vmax_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vmaxu.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmaxu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.umax.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.vp.umax.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32) + +define <4 x i64> @vmax_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <4 x i64> @llvm.vp.umax.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vmax_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.umax.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vmax_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vmaxu.vv v8, v8, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; RV64-NEXT: vmaxu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.umax.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vmax_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vmaxu.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmaxu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.umax.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.vp.umax.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) + +define <8 x i64> @vmax_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <8 x i64> @llvm.vp.umax.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmax_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.umax.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmax_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vmaxu.vv v8, v8, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV64-NEXT: vmaxu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.umax.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmax_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vmaxu.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmaxu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.umax.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.vp.umax.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32) + +define <16 x i64> @vmax_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <16 x i64> @llvm.vp.umax.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmax_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.umax.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmax_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vmaxu.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vmaxu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.umax.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmax_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v16i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vmaxu.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v16i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmaxu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.umax.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +; Test that split-legalization works as expected. + +declare <32 x i64> @llvm.vp.umax.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) + +define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v32i64: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v1, v0 +; RV32-NEXT: li a1, 0 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: bltu a0, a2, .LBB74_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: .LBB74_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: li a1, 16 +; RV32-NEXT: vmaxu.vv v16, v16, v24, v0.t +; RV32-NEXT: bltu a0, a1, .LBB74_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: li a0, 16 +; RV32-NEXT: .LBB74_4: +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vmaxu.vv v8, v8, v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v32i64: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v24, v0 +; RV64-NEXT: li a2, 0 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: bltu a0, a1, .LBB74_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: .LBB74_2: +; RV64-NEXT: li a1, -1 +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV64-NEXT: li a2, 16 +; RV64-NEXT: vmaxu.vx v16, v16, a1, v0.t +; RV64-NEXT: bltu a0, a2, .LBB74_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: li a0, 16 +; RV64-NEXT: .LBB74_4: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vmaxu.vx v8, v8, a1, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.umax.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + ret <32 x i64> %v +} + +define <32 x i64> @vmax_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { +; RV32-LABEL: vmax_vi_v32i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: li a1, 0 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: bltu a0, a2, .LBB75_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: .LBB75_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: li a1, 16 +; RV32-NEXT: vmaxu.vv v16, v16, v24 +; RV32-NEXT: bltu a0, a1, .LBB75_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: li a0, 16 +; RV32-NEXT: .LBB75_4: +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmaxu.vv v8, v8, v24 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vi_v32i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: li a2, 0 +; RV64-NEXT: bltu a0, a1, .LBB75_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: .LBB75_2: +; RV64-NEXT: li a1, -1 +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: li a2, 16 +; RV64-NEXT: vmaxu.vx v16, v16, a1 +; RV64-NEXT: bltu a0, a2, .LBB75_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: li a0, 16 +; RV64-NEXT: .LBB75_4: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vmaxu.vx v8, v8, a1 +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %head = insertelement <32 x i1> poison, i1 true, i32 0 + %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.umax.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + ret <32 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll @@ -0,0 +1,1203 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <8 x i7> @llvm.vp.smin.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) + +define <8 x i7> @vmax_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vadd.vv v9, v9, v9 +; CHECK-NEXT: vsra.vi v9, v9, 1 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i7> @llvm.vp.smin.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) + ret <8 x i7> %v +} + +declare <2 x i8> @llvm.vp.smin.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) + +define <2 x i8> @vmax_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.smin.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vmax_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.smin.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vmax_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.smin.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vmax_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.smin.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.vp.smin.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) + +define <4 x i8> @vmax_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i8> @llvm.vp.smin.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vmax_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.smin.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vmax_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.smin.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vmax_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.smin.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vmax_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.smin.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +declare <5 x i8> @llvm.vp.smin.v5i8(<5 x i8>, <5 x i8>, <5 x i1>, i32) + +define <5 x i8> @vmax_vv_v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <5 x i8> @llvm.vp.smin.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vmax_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.smin.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vmax_vx_v5i8(<5 x i8> %va, i8 %b, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.smin.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vmax_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.smin.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +declare <8 x i8> @llvm.vp.smin.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) + +define <8 x i8> @vmax_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i8> @llvm.vp.smin.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vmax_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.smin.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vmax_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.smin.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vmax_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.smin.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.vp.smin.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32) + +define <16 x i8> @vmax_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.vp.smin.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vmax_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.smin.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vmax_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.smin.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vmax_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.smin.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +declare <256 x i8> @llvm.vp.smin.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32) + +define <256 x i8> @vmax_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v258i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v25, (a1) +; CHECK-NEXT: addi a4, a2, -128 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: bltu a2, a4, .LBB22_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a4 +; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a2, a3, .LBB22_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: .LBB22_4: +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.smin.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + ret <256 x i8> %v +} + +define <256 x i8> @vmax_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v258i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a1, -128 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: bltu a1, a2, .LBB23_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: vmin.vx v16, v16, a0 +; CHECK-NEXT: bltu a1, a2, .LBB23_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: .LBB23_4: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %head = insertelement <256 x i1> poison, i1 true, i32 0 + %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.smin.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + ret <256 x i8> %v +} + +; Test splitting when the %evl is a known constant. + +define <256 x i8> @vmax_vx_v258i8_evl129(<256 x i8> %va, i8 %b, <256 x i1> %m) { +; CHECK-LABEL: vmax_vx_v258i8_evl129: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu +; CHECK-NEXT: vlm.v v24, (a1) +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.smin.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129) + ret <256 x i8> %v +} + +; FIXME: The upper half is doing nothing. + +define <256 x i8> @vmax_vx_v258i8_evl128(<256 x i8> %va, i8 %b, <256 x i1> %m) { +; CHECK-LABEL: vmax_vx_v258i8_evl128: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu +; CHECK-NEXT: vlm.v v24, (a1) +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.smin.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128) + ret <256 x i8> %v +} + +declare <2 x i16> @llvm.vp.smin.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32) + +define <2 x i16> @vmax_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.vp.smin.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vmax_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.smin.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vmax_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.smin.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vmax_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.smin.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.vp.smin.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32) + +define <4 x i16> @vmax_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i16> @llvm.vp.smin.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vmax_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.smin.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vmax_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.smin.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vmax_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.smin.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.vp.smin.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32) + +define <8 x i16> @vmax_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.vp.smin.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vmax_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.smin.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vmax_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.smin.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vmax_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.smin.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.vp.smin.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32) + +define <16 x i16> @vmax_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <16 x i16> @llvm.vp.smin.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vmax_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.smin.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vmax_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.smin.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vmax_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.smin.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.vp.smin.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32) + +define <2 x i32> @vmax_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.vp.smin.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vmax_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.smin.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vmax_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.smin.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vmax_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.smin.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.vp.smin.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define <4 x i32> @vmax_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vmax_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vmax_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vmax_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.vp.smin.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) + +define <8 x i32> @vmax_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vmax_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vmax_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vmax_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.vp.smin.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32) + +define <16 x i32> @vmax_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.vp.smin.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vmax_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.smin.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vmax_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.smin.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vmax_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.smin.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.vp.smin.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32) + +define <2 x i64> @vmax_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.vp.smin.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vmax_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.smin.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vmax_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vmin.vv v8, v8, v9, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; RV64-NEXT: vmin.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.smin.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vmax_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vmin.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmin.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.smin.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.vp.smin.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32) + +define <4 x i64> @vmax_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <4 x i64> @llvm.vp.smin.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vmax_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.smin.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vmax_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vmin.vv v8, v8, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; RV64-NEXT: vmin.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.smin.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vmax_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vmin.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmin.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.smin.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.vp.smin.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) + +define <8 x i64> @vmax_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <8 x i64> @llvm.vp.smin.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmax_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.smin.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmax_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vmin.vv v8, v8, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV64-NEXT: vmin.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.smin.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmax_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vmin.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmin.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.smin.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.vp.smin.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32) + +define <16 x i64> @vmax_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <16 x i64> @llvm.vp.smin.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmax_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.smin.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmax_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vmin.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vmin.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.smin.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmax_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v16i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vmin.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v16i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmin.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.smin.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +; Test that split-legalization works as expected. + +declare <32 x i64> @llvm.vp.smin.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) + +define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v32i64: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v1, v0 +; RV32-NEXT: li a1, 0 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: bltu a0, a2, .LBB74_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: .LBB74_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: li a1, 16 +; RV32-NEXT: vmin.vv v16, v16, v24, v0.t +; RV32-NEXT: bltu a0, a1, .LBB74_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: li a0, 16 +; RV32-NEXT: .LBB74_4: +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vmin.vv v8, v8, v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v32i64: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v24, v0 +; RV64-NEXT: li a2, 0 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: bltu a0, a1, .LBB74_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: .LBB74_2: +; RV64-NEXT: li a1, -1 +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV64-NEXT: li a2, 16 +; RV64-NEXT: vmin.vx v16, v16, a1, v0.t +; RV64-NEXT: bltu a0, a2, .LBB74_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: li a0, 16 +; RV64-NEXT: .LBB74_4: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vmin.vx v8, v8, a1, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.smin.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + ret <32 x i64> %v +} + +define <32 x i64> @vmax_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { +; RV32-LABEL: vmax_vi_v32i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: li a1, 0 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: bltu a0, a2, .LBB75_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: .LBB75_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: li a1, 16 +; RV32-NEXT: vmin.vv v16, v16, v24 +; RV32-NEXT: bltu a0, a1, .LBB75_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: li a0, 16 +; RV32-NEXT: .LBB75_4: +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmin.vv v8, v8, v24 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vi_v32i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: li a2, 0 +; RV64-NEXT: bltu a0, a1, .LBB75_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: .LBB75_2: +; RV64-NEXT: li a1, -1 +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: li a2, 16 +; RV64-NEXT: vmin.vx v16, v16, a1 +; RV64-NEXT: bltu a0, a2, .LBB75_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: li a0, 16 +; RV64-NEXT: .LBB75_4: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vmin.vx v8, v8, a1 +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %head = insertelement <32 x i1> poison, i1 true, i32 0 + %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.smin.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + ret <32 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll @@ -0,0 +1,1202 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <8 x i7> @llvm.vp.umin.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) + +define <8 x i7> @vmax_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i7: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 127 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i7> @llvm.vp.umin.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) + ret <8 x i7> %v +} + +declare <2 x i8> @llvm.vp.umin.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) + +define <2 x i8> @vmax_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.umin.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vmax_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.umin.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vmax_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.umin.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vmax_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.umin.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.vp.umin.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) + +define <4 x i8> @vmax_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i8> @llvm.vp.umin.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vmax_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.umin.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vmax_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.umin.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vmax_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.umin.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vmax_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.umin.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +declare <5 x i8> @llvm.vp.umin.v5i8(<5 x i8>, <5 x i8>, <5 x i1>, i32) + +define <5 x i8> @vmax_vv_v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <5 x i8> @llvm.vp.umin.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vmax_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.umin.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vmax_vx_v5i8(<5 x i8> %va, i8 %b, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.umin.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vmax_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.umin.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +declare <8 x i8> @llvm.vp.umin.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) + +define <8 x i8> @vmax_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i8> @llvm.vp.umin.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vmax_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.umin.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vmax_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.umin.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vmax_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.umin.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.vp.umin.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32) + +define <16 x i8> @vmax_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.vp.umin.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vmax_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.umin.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vmax_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.umin.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vmax_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.umin.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +declare <256 x i8> @llvm.vp.umin.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32) + +define <256 x i8> @vmax_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v258i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v25, (a1) +; CHECK-NEXT: addi a4, a2, -128 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: bltu a2, a4, .LBB22_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a4 +; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a2, a3, .LBB22_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: .LBB22_4: +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.umin.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + ret <256 x i8> %v +} + +define <256 x i8> @vmax_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v258i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a1, -128 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: bltu a1, a2, .LBB23_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: vminu.vx v16, v16, a0 +; CHECK-NEXT: bltu a1, a2, .LBB23_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: .LBB23_4: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %head = insertelement <256 x i1> poison, i1 true, i32 0 + %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.umin.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + ret <256 x i8> %v +} + +; Test splitting when the %evl is a known constant. + +define <256 x i8> @vmax_vx_v258i8_evl129(<256 x i8> %va, i8 %b, <256 x i1> %m) { +; CHECK-LABEL: vmax_vx_v258i8_evl129: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu +; CHECK-NEXT: vlm.v v24, (a1) +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.umin.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129) + ret <256 x i8> %v +} + +; FIXME: The upper half is doing nothing. + +define <256 x i8> @vmax_vx_v258i8_evl128(<256 x i8> %va, i8 %b, <256 x i1> %m) { +; CHECK-LABEL: vmax_vx_v258i8_evl128: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu +; CHECK-NEXT: vlm.v v24, (a1) +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.umin.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128) + ret <256 x i8> %v +} + +declare <2 x i16> @llvm.vp.umin.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32) + +define <2 x i16> @vmax_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.vp.umin.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vmax_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.umin.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vmax_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.umin.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vmax_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.umin.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.vp.umin.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32) + +define <4 x i16> @vmax_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i16> @llvm.vp.umin.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vmax_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.umin.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vmax_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.umin.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vmax_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.umin.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.vp.umin.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32) + +define <8 x i16> @vmax_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.vp.umin.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vmax_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.umin.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vmax_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.umin.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vmax_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.umin.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.vp.umin.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32) + +define <16 x i16> @vmax_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <16 x i16> @llvm.vp.umin.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vmax_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.umin.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vmax_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.umin.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vmax_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.umin.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.vp.umin.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32) + +define <2 x i32> @vmax_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.vp.umin.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vmax_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.umin.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vmax_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.umin.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vmax_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.umin.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.vp.umin.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define <4 x i32> @vmax_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vmax_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vmax_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vmax_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.vp.umin.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) + +define <8 x i32> @vmax_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vmax_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vmax_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vmax_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.vp.umin.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32) + +define <16 x i32> @vmax_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.vp.umin.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vmax_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.umin.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vmax_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.umin.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vmax_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.umin.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.vp.umin.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32) + +define <2 x i64> @vmax_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.vp.umin.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vmax_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.umin.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vmax_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vminu.vv v8, v8, v9, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; RV64-NEXT: vminu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.umin.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vmax_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vminu.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vminu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.umin.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.vp.umin.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32) + +define <4 x i64> @vmax_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <4 x i64> @llvm.vp.umin.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vmax_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.umin.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vmax_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vminu.vv v8, v8, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; RV64-NEXT: vminu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.umin.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vmax_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vminu.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vminu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.umin.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.vp.umin.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) + +define <8 x i64> @vmax_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <8 x i64> @llvm.vp.umin.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmax_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.umin.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmax_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vminu.vv v8, v8, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV64-NEXT: vminu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.umin.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmax_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vminu.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vminu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.umin.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.vp.umin.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32) + +define <16 x i64> @vmax_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <16 x i64> @llvm.vp.umin.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmax_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.umin.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmax_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vminu.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vminu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.umin.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmax_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v16i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vminu.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v16i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vminu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.umin.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +; Test that split-legalization works as expected. + +declare <32 x i64> @llvm.vp.umin.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) + +define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_v32i64: +; RV32: # %bb.0: +; RV32-NEXT: vmv1r.v v1, v0 +; RV32-NEXT: li a1, 0 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: bltu a0, a2, .LBB74_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: .LBB74_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV32-NEXT: li a1, 16 +; RV32-NEXT: vminu.vv v16, v16, v24, v0.t +; RV32-NEXT: bltu a0, a1, .LBB74_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: li a0, 16 +; RV32-NEXT: .LBB74_4: +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vminu.vv v8, v8, v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_v32i64: +; RV64: # %bb.0: +; RV64-NEXT: vmv1r.v v24, v0 +; RV64-NEXT: li a2, 0 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: bltu a0, a1, .LBB74_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: .LBB74_2: +; RV64-NEXT: li a1, -1 +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV64-NEXT: li a2, 16 +; RV64-NEXT: vminu.vx v16, v16, a1, v0.t +; RV64-NEXT: bltu a0, a2, .LBB74_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: li a0, 16 +; RV64-NEXT: .LBB74_4: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vminu.vx v8, v8, a1, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.umin.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + ret <32 x i64> %v +} + +define <32 x i64> @vmax_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { +; RV32-LABEL: vmax_vi_v32i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: li a1, 0 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: bltu a0, a2, .LBB75_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: .LBB75_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: li a1, 16 +; RV32-NEXT: vminu.vv v16, v16, v24 +; RV32-NEXT: bltu a0, a1, .LBB75_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: li a0, 16 +; RV32-NEXT: .LBB75_4: +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vminu.vv v8, v8, v24 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vi_v32i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: li a2, 0 +; RV64-NEXT: bltu a0, a1, .LBB75_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: .LBB75_2: +; RV64-NEXT: li a1, -1 +; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV64-NEXT: li a2, 16 +; RV64-NEXT: vminu.vx v16, v16, a1 +; RV64-NEXT: bltu a0, a2, .LBB75_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: li a0, 16 +; RV64-NEXT: .LBB75_4: +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vminu.vx v8, v8, a1 +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %head = insertelement <32 x i1> poison, i1 true, i32 0 + %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.umin.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + ret <32 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll @@ -0,0 +1,1496 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.vp.smax.nxv8i7(, , , i32) + +define @vmax_vx_nxv8i7( %a, i7 signext %b, %mask, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv8i7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vadd.vv v9, v9, v9 +; CHECK-NEXT: vsra.vi v9, v9, 1 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i7 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv8i7( %a, %vb, %mask, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv1i8(, , , i32) + +define @vmax_vv_nxv1i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv1i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv1i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv1i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv1i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv1i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv1i8_commute( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv1i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv1i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv1i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv1i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv2i8(, , , i32) + +define @vmax_vv_nxv2i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv2i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv2i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv2i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv2i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv2i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv2i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv2i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv3i8(, , , i32) + +define @vmax_vv_nxv3i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv3i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv3i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv3i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv3i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv3i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv3i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv3i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv4i8(, , , i32) + +define @vmax_vv_nxv4i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv4i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv4i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv4i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv4i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv4i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv4i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv4i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv8i8(, , , i32) + +define @vmax_vv_nxv8i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv8i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv8i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv8i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv8i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv8i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv8i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv16i8(, , , i32) + +define @vmax_vv_nxv16i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv16i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv16i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv16i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv16i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv16i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv16i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv16i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv32i8(, , , i32) + +define @vmax_vv_nxv32i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv32i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv32i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv32i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv32i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv32i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv32i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv32i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv64i8(, , , i32) + +define @vmax_vv_nxv64i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv64i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv64i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv64i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv64i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv64i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv64i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv64i8( %va, %vb, %m, i32 %evl) + ret %v +} + +; Test that split-legalization works when the mask itself needs splitting. + +declare @llvm.vp.smax.nxv128i8(, , , i32) + +define @vmax_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: bltu a2, a3, .LBB34_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: .LBB34_2: +; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v24, (a1) +; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, mu +; CHECK-NEXT: sub a1, a2, a3 +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: bltu a2, a1, .LBB34_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a5, a1 +; CHECK-NEXT: .LBB34_4: +; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv128i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv128i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv128i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: bltu a1, a2, .LBB35_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB35_2: +; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: sub a2, a1, a2 +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: bltu a1, a2, .LBB35_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: .LBB35_4: +; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma +; CHECK-NEXT: vmax.vx v16, v16, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv128i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv1i16(, , , i32) + +define @vmax_vv_nxv1i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv1i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv1i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv1i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv1i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv1i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv1i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv1i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv2i16(, , , i32) + +define @vmax_vv_nxv2i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv2i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv2i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv2i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv2i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv2i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv2i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv2i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv4i16(, , , i32) + +define @vmax_vv_nxv4i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv4i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv4i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv4i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv4i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv4i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv4i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv4i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv8i16(, , , i32) + +define @vmax_vv_nxv8i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv8i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv8i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv8i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv8i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv8i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv8i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv8i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv16i16(, , , i32) + +define @vmax_vv_nxv16i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv16i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv16i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv16i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv16i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv16i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv16i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv16i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv32i16(, , , i32) + +define @vmax_vv_nxv32i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv32i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv32i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv32i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv32i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv32i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv32i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv32i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv1i32(, , , i32) + +define @vmax_vv_nxv1i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv1i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv1i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv1i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv1i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv1i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv1i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv1i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv2i32(, , , i32) + +define @vmax_vv_nxv2i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv2i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv2i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv2i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv2i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv2i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv2i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv2i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv4i32(, , , i32) + +define @vmax_vv_nxv4i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv4i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv4i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv4i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv4i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv4i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv4i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv4i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv8i32(, , , i32) + +define @vmax_vv_nxv8i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv8i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv8i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv8i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv8i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv8i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv8i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv16i32(, , , i32) + +define @vmax_vv_nxv16i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv16i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv16i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv16i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv16i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv16i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv16i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv16i32( %va, %vb, %m, i32 %evl) + ret %v +} + +; Test that split-legalization works then the mask needs manual splitting. + +declare @llvm.vp.smax.nxv32i32(, , , i32) + +define @vmax_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a5, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub a4, a1, a2 +; CHECK-NEXT: vslidedown.vx v0, v0, a5 +; CHECK-NEXT: bltu a1, a4, .LBB80_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a4 +; CHECK-NEXT: .LBB80_2: +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a1, a2, .LBB80_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: .LBB80_4: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv32i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv32i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vx_nxv32i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: bltu a1, a2, .LBB81_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB81_2: +; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: sub a2, a1, a2 +; CHECK-NEXT: vmax.vx v8, v8, a0 +; CHECK-NEXT: bltu a1, a2, .LBB81_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: .LBB81_4: +; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; CHECK-NEXT: vmax.vx v16, v16, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv32i32( %va, %vb, %m, i32 %evl) + ret %v +} + +; Test splitting when the %evl is a constant (albeit an unknown one). + +declare i32 @llvm.vscale.i32() + +; FIXME: The upper half of the operation is doing nothing. +; FIXME: The branches comparing vscale vs. vscale should be constant-foldable. + +define @vmax_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { +; CHECK-LABEL: vmax_vx_nxv32i32_evl_nx8: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a5, a1, 2 +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: sub a4, a1, a2 +; CHECK-NEXT: vslidedown.vx v0, v0, a5 +; CHECK-NEXT: bltu a1, a4, .LBB82_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a4 +; CHECK-NEXT: .LBB82_2: +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a1, a2, .LBB82_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: .LBB82_4: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %evl = call i32 @llvm.vscale.i32() + %evl0 = mul i32 %evl, 8 + %v = call @llvm.vp.smax.nxv32i32( %va, %vb, %m, i32 %evl0) + ret %v +} + +; FIXME: The first vmax.vx should be able to infer that its AVL is equivalent to VLMAX. +; FIXME: The upper half of the operation is doing nothing but we don't catch +; that on RV64; we issue a usubsat(and (vscale x 16), 0xffffffff, vscale x 16) +; (the "original" %evl is the "and", due to known-bits issues with legalizing +; the i32 %evl to i64) and this isn't detected as 0. +; This could be resolved in the future with more detailed KnownBits analysis +; for ISD::VSCALE. + +define @vmax_vx_nxv32i32_evl_nx16( %va, i32 %b, %m) { +; CHECK-LABEL: vmax_vx_nxv32i32_evl_nx16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v24, v0, a2 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 0, e32, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %evl = call i32 @llvm.vscale.i32() + %evl0 = mul i32 %evl, 16 + %v = call @llvm.vp.smax.nxv32i32( %va, %vb, %m, i32 %evl0) + ret %v +} + +declare @llvm.vp.smax.nxv1i64(, , , i32) + +define @vmax_vv_nxv1i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv1i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv1i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv1i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv1i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vmax.vv v8, v8, v9, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; RV64-NEXT: vmax.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv1i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv1i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_nxv1i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vmax.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_nxv1i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmax.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv1i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv2i64(, , , i32) + +define @vmax_vv_nxv2i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv2i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv2i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv2i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv2i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vmax.vv v8, v8, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; RV64-NEXT: vmax.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv2i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv2i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_nxv2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vmax.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_nxv2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmax.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv2i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv4i64(, , , i32) + +define @vmax_vv_nxv4i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv4i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv4i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv4i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv4i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vmax.vv v8, v8, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV64-NEXT: vmax.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv4i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv4i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_nxv4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vmax.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_nxv4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmax.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv4i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smax.nxv8i64(, , , i32) + +define @vmax_vv_nxv8i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmax.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smax.nxv8i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vv_nxv8i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmax_vv_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmax.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv8i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv8i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vmax.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vmax.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmax_vx_nxv8i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmax_vx_nxv8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vmax.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmax_vx_nxv8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmax.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smax.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll @@ -0,0 +1,1495 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.vp.umax.nxv8i7(, , , i32) + +define @vmaxu_vx_nxv8i7( %a, i7 signext %b, %mask, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv8i7: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 127 +; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vx v9, v9, a2 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i7 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv8i7( %a, %vb, %mask, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv1i8(, , , i32) + +define @vmaxu_vv_nxv1i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv1i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv1i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv1i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv1i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv1i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv1i8_commute( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv1i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv1i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv1i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv1i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv2i8(, , , i32) + +define @vmaxu_vv_nxv2i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv2i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv2i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv2i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv2i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv2i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv2i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv2i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv3i8(, , , i32) + +define @vmaxu_vv_nxv3i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv3i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv3i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv3i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv3i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv3i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv3i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv3i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv4i8(, , , i32) + +define @vmaxu_vv_nxv4i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv4i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv4i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv4i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv4i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv4i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv4i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv4i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv8i8(, , , i32) + +define @vmaxu_vv_nxv8i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv8i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv8i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv8i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv8i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv8i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv8i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv16i8(, , , i32) + +define @vmaxu_vv_nxv16i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv16i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv16i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv16i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv16i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv16i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv16i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv16i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv32i8(, , , i32) + +define @vmaxu_vv_nxv32i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv32i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv32i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv32i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv32i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv32i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv32i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv32i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv64i8(, , , i32) + +define @vmaxu_vv_nxv64i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv64i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv64i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv64i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv64i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv64i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv64i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv64i8( %va, %vb, %m, i32 %evl) + ret %v +} + +; Test that split-legalization works when the mask itself needs splitting. + +declare @llvm.vp.umax.nxv128i8(, , , i32) + +define @vmaxu_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: bltu a2, a3, .LBB34_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: .LBB34_2: +; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v24, (a1) +; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, mu +; CHECK-NEXT: sub a1, a2, a3 +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: bltu a2, a1, .LBB34_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a5, a1 +; CHECK-NEXT: .LBB34_4: +; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv128i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv128i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv128i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: bltu a1, a2, .LBB35_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB35_2: +; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: sub a2, a1, a2 +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: bltu a1, a2, .LBB35_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: .LBB35_4: +; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma +; CHECK-NEXT: vmaxu.vx v16, v16, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv128i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv1i16(, , , i32) + +define @vmaxu_vv_nxv1i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv1i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv1i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv1i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv1i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv1i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv1i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv1i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv2i16(, , , i32) + +define @vmaxu_vv_nxv2i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv2i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv2i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv2i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv2i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv2i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv2i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv2i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv4i16(, , , i32) + +define @vmaxu_vv_nxv4i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv4i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv4i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv4i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv4i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv4i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv4i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv4i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv8i16(, , , i32) + +define @vmaxu_vv_nxv8i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv8i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv8i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv8i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv8i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv8i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv8i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv8i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv16i16(, , , i32) + +define @vmaxu_vv_nxv16i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv16i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv16i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv16i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv16i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv16i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv16i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv16i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv32i16(, , , i32) + +define @vmaxu_vv_nxv32i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv32i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv32i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv32i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv32i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv32i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv32i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv32i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv1i32(, , , i32) + +define @vmaxu_vv_nxv1i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv1i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv1i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv1i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv1i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv1i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv1i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv1i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv2i32(, , , i32) + +define @vmaxu_vv_nxv2i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv2i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv2i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv2i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv2i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv2i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv2i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv2i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv4i32(, , , i32) + +define @vmaxu_vv_nxv4i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv4i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv4i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv4i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv4i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv4i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv4i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv4i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv8i32(, , , i32) + +define @vmaxu_vv_nxv8i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv8i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv8i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv8i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv8i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv8i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv8i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv16i32(, , , i32) + +define @vmaxu_vv_nxv16i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv16i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv16i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv16i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv16i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv16i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv16i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv16i32( %va, %vb, %m, i32 %evl) + ret %v +} + +; Test that split-legalization works then the mask needs manual splitting. + +declare @llvm.vp.umax.nxv32i32(, , , i32) + +define @vmaxu_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a5, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub a4, a1, a2 +; CHECK-NEXT: vslidedown.vx v0, v0, a5 +; CHECK-NEXT: bltu a1, a4, .LBB80_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a4 +; CHECK-NEXT: .LBB80_2: +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a1, a2, .LBB80_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: .LBB80_4: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv32i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv32i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vx_nxv32i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: bltu a1, a2, .LBB81_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB81_2: +; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: sub a2, a1, a2 +; CHECK-NEXT: vmaxu.vx v8, v8, a0 +; CHECK-NEXT: bltu a1, a2, .LBB81_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: .LBB81_4: +; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; CHECK-NEXT: vmaxu.vx v16, v16, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv32i32( %va, %vb, %m, i32 %evl) + ret %v +} + +; Test splitting when the %evl is a constant (albeit an unknown one). + +declare i32 @llvm.vscale.i32() + +; FIXME: The upper half of the operation is doing nothing. +; FIXME: The branches comparing vscale vs. vscale should be constant-foldable. + +define @vmaxu_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { +; CHECK-LABEL: vmaxu_vx_nxv32i32_evl_nx8: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a5, a1, 2 +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: sub a4, a1, a2 +; CHECK-NEXT: vslidedown.vx v0, v0, a5 +; CHECK-NEXT: bltu a1, a4, .LBB82_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a4 +; CHECK-NEXT: .LBB82_2: +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a1, a2, .LBB82_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: .LBB82_4: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %evl = call i32 @llvm.vscale.i32() + %evl0 = mul i32 %evl, 8 + %v = call @llvm.vp.umax.nxv32i32( %va, %vb, %m, i32 %evl0) + ret %v +} + +; FIXME: The first vmaxu.vx should be able to infer that its AVL is equivalent to VLMAX. +; FIXME: The upper half of the operation is doing nothing but we don't catch +; that on RV64; we issue a usubsat(and (vscale x 16), 0xffffffff, vscale x 16) +; (the "original" %evl is the "and", due to known-bits issues with legalizing +; the i32 %evl to i64) and this isn't detected as 0. +; This could be resolved in the future with more detailed KnownBits analysis +; for ISD::VSCALE. + +define @vmaxu_vx_nxv32i32_evl_nx16( %va, i32 %b, %m) { +; CHECK-LABEL: vmaxu_vx_nxv32i32_evl_nx16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v24, v0, a2 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 0, e32, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %evl = call i32 @llvm.vscale.i32() + %evl0 = mul i32 %evl, 16 + %v = call @llvm.vp.umax.nxv32i32( %va, %vb, %m, i32 %evl0) + ret %v +} + +declare @llvm.vp.umax.nxv1i64(, , , i32) + +define @vmaxu_vv_nxv1i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv1i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv1i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv1i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv1i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vmaxu_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vmaxu.vv v8, v8, v9, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmaxu_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; RV64-NEXT: vmaxu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv1i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv1i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmaxu_vx_nxv1i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vmaxu.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmaxu_vx_nxv1i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmaxu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv1i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv2i64(, , , i32) + +define @vmaxu_vv_nxv2i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv2i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv2i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv2i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv2i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vmaxu_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vmaxu.vv v8, v8, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmaxu_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; RV64-NEXT: vmaxu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv2i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv2i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmaxu_vx_nxv2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vmaxu.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmaxu_vx_nxv2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmaxu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv2i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv4i64(, , , i32) + +define @vmaxu_vv_nxv4i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv4i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv4i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv4i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv4i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vmaxu_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vmaxu.vv v8, v8, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmaxu_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV64-NEXT: vmaxu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv4i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv4i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmaxu_vx_nxv4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vmaxu.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmaxu_vx_nxv4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmaxu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv4i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umax.nxv8i64(, , , i32) + +define @vmaxu_vv_nxv8i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmaxu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umax.nxv8i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vv_nxv8i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmaxu_vv_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmaxu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv8i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv8i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vmaxu_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vmaxu.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmaxu_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vmaxu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmaxu_vx_nxv8i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmaxu_vx_nxv8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vmaxu.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmaxu_vx_nxv8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmaxu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umax.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll @@ -0,0 +1,1496 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.vp.smin.nxv8i7(, , , i32) + +define @vmin_vx_nxv8i7( %a, i7 signext %b, %mask, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv8i7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vadd.vv v9, v9, v9 +; CHECK-NEXT: vsra.vi v9, v9, 1 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i7 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv8i7( %a, %vb, %mask, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv1i8(, , , i32) + +define @vmin_vv_nxv1i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv1i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv1i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv1i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv1i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv1i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv1i8_commute( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv1i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv1i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv1i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv1i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv2i8(, , , i32) + +define @vmin_vv_nxv2i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv2i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv2i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv2i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv2i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv2i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv2i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv2i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv3i8(, , , i32) + +define @vmin_vv_nxv3i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv3i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv3i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv3i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv3i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv3i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv3i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv3i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv4i8(, , , i32) + +define @vmin_vv_nxv4i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv4i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv4i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv4i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv4i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv4i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv4i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv4i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv8i8(, , , i32) + +define @vmin_vv_nxv8i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv8i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv8i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv8i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv8i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv8i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv8i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv16i8(, , , i32) + +define @vmin_vv_nxv16i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv16i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv16i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv16i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv16i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv16i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv16i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv16i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv32i8(, , , i32) + +define @vmin_vv_nxv32i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv32i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv32i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv32i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv32i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv32i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv32i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv32i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv64i8(, , , i32) + +define @vmin_vv_nxv64i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv64i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv64i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv64i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv64i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv64i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv64i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv64i8( %va, %vb, %m, i32 %evl) + ret %v +} + +; Test that split-legalization works when the mask itself needs splitting. + +declare @llvm.vp.smin.nxv128i8(, , , i32) + +define @vmin_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: bltu a2, a3, .LBB34_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: .LBB34_2: +; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v24, (a1) +; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, mu +; CHECK-NEXT: sub a1, a2, a3 +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: bltu a2, a1, .LBB34_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a5, a1 +; CHECK-NEXT: .LBB34_4: +; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv128i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv128i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv128i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: bltu a1, a2, .LBB35_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB35_2: +; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: sub a2, a1, a2 +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: bltu a1, a2, .LBB35_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: .LBB35_4: +; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma +; CHECK-NEXT: vmin.vx v16, v16, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv128i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv1i16(, , , i32) + +define @vmin_vv_nxv1i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv1i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv1i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv1i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv1i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv1i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv1i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv1i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv2i16(, , , i32) + +define @vmin_vv_nxv2i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv2i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv2i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv2i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv2i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv2i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv2i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv2i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv4i16(, , , i32) + +define @vmin_vv_nxv4i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv4i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv4i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv4i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv4i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv4i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv4i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv4i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv8i16(, , , i32) + +define @vmin_vv_nxv8i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv8i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv8i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv8i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv8i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv8i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv8i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv8i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv16i16(, , , i32) + +define @vmin_vv_nxv16i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv16i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv16i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv16i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv16i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv16i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv16i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv16i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv32i16(, , , i32) + +define @vmin_vv_nxv32i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv32i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv32i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv32i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv32i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv32i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv32i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv32i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv1i32(, , , i32) + +define @vmin_vv_nxv1i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv1i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv1i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv1i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv1i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv1i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv1i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv1i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv2i32(, , , i32) + +define @vmin_vv_nxv2i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv2i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv2i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv2i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv2i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv2i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv2i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv2i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv4i32(, , , i32) + +define @vmin_vv_nxv4i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv4i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv4i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv4i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv4i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv4i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv4i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv4i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv8i32(, , , i32) + +define @vmin_vv_nxv8i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv8i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv8i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv8i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv8i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv8i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv8i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv16i32(, , , i32) + +define @vmin_vv_nxv16i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv16i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv16i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv16i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv16i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv16i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv16i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv16i32( %va, %vb, %m, i32 %evl) + ret %v +} + +; Test that split-legalization works then the mask needs manual splitting. + +declare @llvm.vp.smin.nxv32i32(, , , i32) + +define @vmin_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a5, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub a4, a1, a2 +; CHECK-NEXT: vslidedown.vx v0, v0, a5 +; CHECK-NEXT: bltu a1, a4, .LBB80_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a4 +; CHECK-NEXT: .LBB80_2: +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a1, a2, .LBB80_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: .LBB80_4: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv32i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv32i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vx_nxv32i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: bltu a1, a2, .LBB81_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB81_2: +; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: sub a2, a1, a2 +; CHECK-NEXT: vmin.vx v8, v8, a0 +; CHECK-NEXT: bltu a1, a2, .LBB81_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: .LBB81_4: +; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; CHECK-NEXT: vmin.vx v16, v16, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv32i32( %va, %vb, %m, i32 %evl) + ret %v +} + +; Test splitting when the %evl is a constant (albeit an unknown one). + +declare i32 @llvm.vscale.i32() + +; FIXME: The upper half of the operation is doing nothing. +; FIXME: The branches comparing vscale vs. vscale should be constant-foldable. + +define @vmin_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { +; CHECK-LABEL: vmin_vx_nxv32i32_evl_nx8: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a5, a1, 2 +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: sub a4, a1, a2 +; CHECK-NEXT: vslidedown.vx v0, v0, a5 +; CHECK-NEXT: bltu a1, a4, .LBB82_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a4 +; CHECK-NEXT: .LBB82_2: +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a1, a2, .LBB82_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: .LBB82_4: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %evl = call i32 @llvm.vscale.i32() + %evl0 = mul i32 %evl, 8 + %v = call @llvm.vp.smin.nxv32i32( %va, %vb, %m, i32 %evl0) + ret %v +} + +; FIXME: The first vmin.vx should be able to infer that its AVL is equivalent to VLMAX. +; FIXME: The upper half of the operation is doing nothing but we don't catch +; that on RV64; we issue a usubsat(and (vscale x 16), 0xffffffff, vscale x 16) +; (the "original" %evl is the "and", due to known-bits issues with legalizing +; the i32 %evl to i64) and this isn't detected as 0. +; This could be resolved in the future with more detailed KnownBits analysis +; for ISD::VSCALE. + +define @vmin_vx_nxv32i32_evl_nx16( %va, i32 %b, %m) { +; CHECK-LABEL: vmin_vx_nxv32i32_evl_nx16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v24, v0, a2 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 0, e32, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %evl = call i32 @llvm.vscale.i32() + %evl0 = mul i32 %evl, 16 + %v = call @llvm.vp.smin.nxv32i32( %va, %vb, %m, i32 %evl0) + ret %v +} + +declare @llvm.vp.smin.nxv1i64(, , , i32) + +define @vmin_vv_nxv1i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv1i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv1i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv1i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv1i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vmin_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vmin.vv v8, v8, v9, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmin_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; RV64-NEXT: vmin.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv1i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv1i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmin_vx_nxv1i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vmin.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmin_vx_nxv1i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmin.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv1i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv2i64(, , , i32) + +define @vmin_vv_nxv2i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv2i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv2i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv2i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv2i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vmin_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vmin.vv v8, v8, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmin_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; RV64-NEXT: vmin.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv2i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv2i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmin_vx_nxv2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vmin.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmin_vx_nxv2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmin.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv2i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv4i64(, , , i32) + +define @vmin_vv_nxv4i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv4i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv4i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv4i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv4i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vmin_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vmin.vv v8, v8, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmin_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV64-NEXT: vmin.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv4i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv4i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmin_vx_nxv4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vmin.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmin_vx_nxv4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmin.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv4i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.smin.nxv8i64(, , , i32) + +define @vmin_vv_nxv8i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vmin.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.smin.nxv8i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vv_nxv8i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vmin_vv_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv8i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv8i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vmin_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vmin.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmin_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vmin.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmin_vx_nxv8i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vmin_vx_nxv8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vmin.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmin_vx_nxv8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmin.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.smin.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll @@ -0,0 +1,1495 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.vp.umin.nxv8i7(, , , i32) + +define @vminu_vx_nxv8i7( %a, i7 signext %b, %mask, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv8i7: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 127 +; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vx v9, v9, a2 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i7 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv8i7( %a, %vb, %mask, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv1i8(, , , i32) + +define @vminu_vv_nxv1i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv1i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv1i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv1i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv1i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv1i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv1i8_commute( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv1i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv1i8( %vb, %va, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv1i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv1i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv2i8(, , , i32) + +define @vminu_vv_nxv2i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv2i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv2i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv2i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv2i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv2i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv2i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv2i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv3i8(, , , i32) + +define @vminu_vv_nxv3i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv3i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv3i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv3i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv3i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv3i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv3i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv3i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv4i8(, , , i32) + +define @vminu_vv_nxv4i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv4i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv4i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv4i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv4i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv4i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv4i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv4i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv8i8(, , , i32) + +define @vminu_vv_nxv8i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv8i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv8i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv8i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv8i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv8i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv8i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv8i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv16i8(, , , i32) + +define @vminu_vv_nxv16i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv16i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv16i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv16i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv16i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv16i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv16i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv16i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv32i8(, , , i32) + +define @vminu_vv_nxv32i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv32i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv32i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv32i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv32i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv32i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv32i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv32i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv64i8(, , , i32) + +define @vminu_vv_nxv64i8( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv64i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv64i8_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv64i8( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv64i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv64i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv64i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv64i8( %va, %vb, %m, i32 %evl) + ret %v +} + +; Test that split-legalization works when the mask itself needs splitting. + +declare @llvm.vp.umin.nxv128i8(, , , i32) + +define @vminu_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: bltu a2, a3, .LBB34_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: .LBB34_2: +; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v24, (a1) +; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, mu +; CHECK-NEXT: sub a1, a2, a3 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: bltu a2, a1, .LBB34_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a5, a1 +; CHECK-NEXT: .LBB34_4: +; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv128i8( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv128i8_unmasked( %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv128i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: bltu a1, a2, .LBB35_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB35_2: +; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: sub a2, a1, a2 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: bltu a1, a2, .LBB35_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: .LBB35_4: +; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma +; CHECK-NEXT: vminu.vx v16, v16, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv128i8( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv1i16(, , , i32) + +define @vminu_vv_nxv1i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv1i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv1i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv1i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv1i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv1i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv1i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv1i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv2i16(, , , i32) + +define @vminu_vv_nxv2i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv2i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv2i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv2i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv2i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv2i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv2i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv2i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv4i16(, , , i32) + +define @vminu_vv_nxv4i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv4i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv4i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv4i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv4i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv4i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv4i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv4i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv8i16(, , , i32) + +define @vminu_vv_nxv8i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv8i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv8i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv8i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv8i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv8i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv8i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv8i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv16i16(, , , i32) + +define @vminu_vv_nxv16i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv16i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv16i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv16i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv16i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv16i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv16i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv16i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv32i16(, , , i32) + +define @vminu_vv_nxv32i16( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv32i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv32i16_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv32i16( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv32i16( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv32i16( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv32i16_unmasked( %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv32i16( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv1i32(, , , i32) + +define @vminu_vv_nxv1i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv1i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv1i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv1i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv1i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv1i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv1i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv1i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv2i32(, , , i32) + +define @vminu_vv_nxv2i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv2i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv2i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv2i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv2i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv2i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv2i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv2i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv4i32(, , , i32) + +define @vminu_vv_nxv4i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv4i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv4i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv4i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv4i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv4i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv4i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv4i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv8i32(, , , i32) + +define @vminu_vv_nxv8i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv8i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv8i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv8i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv8i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv8i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv8i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv8i32( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv16i32(, , , i32) + +define @vminu_vv_nxv16i32( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv16i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv16i32_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv16i32( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv16i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv16i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv16i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv16i32( %va, %vb, %m, i32 %evl) + ret %v +} + +; Test that split-legalization works then the mask needs manual splitting. + +declare @llvm.vp.umin.nxv32i32(, , , i32) + +define @vminu_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a5, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub a4, a1, a2 +; CHECK-NEXT: vslidedown.vx v0, v0, a5 +; CHECK-NEXT: bltu a1, a4, .LBB80_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a4 +; CHECK-NEXT: .LBB80_2: +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a1, a2, .LBB80_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: .LBB80_4: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv32i32( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv32i32_unmasked( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vx_nxv32i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: bltu a1, a2, .LBB81_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: .LBB81_2: +; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: sub a2, a1, a2 +; CHECK-NEXT: vminu.vx v8, v8, a0 +; CHECK-NEXT: bltu a1, a2, .LBB81_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: .LBB81_4: +; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; CHECK-NEXT: vminu.vx v16, v16, a0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv32i32( %va, %vb, %m, i32 %evl) + ret %v +} + +; Test splitting when the %evl is a constant (albeit an unknown one). + +declare i32 @llvm.vscale.i32() + +; FIXME: The upper half of the operation is doing nothing. +; FIXME: The branches comparing vscale vs. vscale should be constant-foldable. + +define @vminu_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { +; CHECK-LABEL: vminu_vx_nxv32i32_evl_nx8: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a5, a1, 2 +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: sub a4, a1, a2 +; CHECK-NEXT: vslidedown.vx v0, v0, a5 +; CHECK-NEXT: bltu a1, a4, .LBB82_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a4 +; CHECK-NEXT: .LBB82_2: +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a1, a2, .LBB82_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: .LBB82_4: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %evl = call i32 @llvm.vscale.i32() + %evl0 = mul i32 %evl, 8 + %v = call @llvm.vp.umin.nxv32i32( %va, %vb, %m, i32 %evl0) + ret %v +} + +; FIXME: The first vminu.vx should be able to infer that its AVL is equivalent to VLMAX. +; FIXME: The upper half of the operation is doing nothing but we don't catch +; that on RV64; we issue a usubsat(and (vscale x 16), 0xffffffff, vscale x 16) +; (the "original" %evl is the "and", due to known-bits issues with legalizing +; the i32 %evl to i64) and this isn't detected as 0. +; This could be resolved in the future with more detailed KnownBits analysis +; for ISD::VSCALE. + +define @vminu_vx_nxv32i32_evl_nx16( %va, i32 %b, %m) { +; CHECK-LABEL: vminu_vx_nxv32i32_evl_nx16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v24, v0, a2 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 0, e32, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %evl = call i32 @llvm.vscale.i32() + %evl0 = mul i32 %evl, 16 + %v = call @llvm.vp.umin.nxv32i32( %va, %vb, %m, i32 %evl0) + ret %v +} + +declare @llvm.vp.umin.nxv1i64(, , , i32) + +define @vminu_vv_nxv1i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv1i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv1i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv1i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv1i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vminu_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vminu.vv v8, v8, v9, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vminu_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; RV64-NEXT: vminu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv1i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv1i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vminu_vx_nxv1i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vminu.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vminu_vx_nxv1i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vminu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv1i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv2i64(, , , i32) + +define @vminu_vv_nxv2i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv2i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv2i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv2i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv2i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vminu_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vminu.vv v8, v8, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vminu_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; RV64-NEXT: vminu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv2i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv2i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vminu_vx_nxv2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vminu.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vminu_vx_nxv2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vminu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv2i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv4i64(, , , i32) + +define @vminu_vv_nxv4i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv4i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv4i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv4i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv4i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vminu_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vminu.vv v8, v8, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vminu_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV64-NEXT: vminu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv4i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv4i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vminu_vx_nxv4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vminu.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vminu_vx_nxv4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vminu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv4i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.umin.nxv8i64(, , , i32) + +define @vminu_vv_nxv8i64( %va, %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vminu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.umin.nxv8i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vv_nxv8i64_unmasked( %va, %b, i32 zeroext %evl) { +; CHECK-LABEL: vminu_vv_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vminu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv8i64( %va, %b, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv8i64( %va, i64 %b, %m, i32 zeroext %evl) { +; RV32-LABEL: vminu_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vminu.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vminu_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vminu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vminu_vx_nxv8i64_unmasked( %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vminu_vx_nxv8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vminu.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vminu_vx_nxv8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vminu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.umin.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp --- a/llvm/unittests/IR/VPIntrinsicTest.cpp +++ b/llvm/unittests/IR/VPIntrinsicTest.cpp @@ -40,7 +40,8 @@ std::unique_ptr createVPDeclarationModule() { const char *BinaryIntOpcodes[] = {"add", "sub", "mul", "sdiv", "srem", "udiv", "urem", "and", "xor", "or", - "ashr", "lshr", "shl"}; + "ashr", "lshr", "shl", "smin", "smax", + "umin", "umax"}; std::stringstream Str; for (const char *BinaryIntOpcode : BinaryIntOpcodes) Str << " declare <8 x i32> @llvm.vp." << BinaryIntOpcode