diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -162,6 +162,10 @@ FDIV_VL, FNEG_VL, FMA_VL, + SMIN_VL, + SMAX_VL, + UMIN_VL, + UMAX_VL, // Set mask vector to all zeros or ones. VMCLR_VL, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -547,6 +547,11 @@ setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); setOperationAction(ISD::SRL, VT, Custom); + + setOperationAction(ISD::SMIN, VT, Custom); + setOperationAction(ISD::SMAX, VT, Custom); + setOperationAction(ISD::UMIN, VT, Custom); + setOperationAction(ISD::UMAX, VT, Custom); } for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { @@ -1201,6 +1206,14 @@ return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL); case ISD::FMA: return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL); + case ISD::SMIN: + return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL); + case ISD::SMAX: + return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL); + case ISD::UMIN: + return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL); + case ISD::UMAX: + return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL); } } @@ -4697,6 +4710,10 @@ NODE_NAME_CASE(FDIV_VL) NODE_NAME_CASE(FNEG_VL) NODE_NAME_CASE(FMA_VL) + NODE_NAME_CASE(SMIN_VL) + NODE_NAME_CASE(SMAX_VL) + NODE_NAME_CASE(UMIN_VL) + NODE_NAME_CASE(UMAX_VL) NODE_NAME_CASE(VMCLR_VL) NODE_NAME_CASE(VMSET_VL) NODE_NAME_CASE(VRGATHER_VX_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -64,6 +64,10 @@ def riscv_shl_vl : SDNode<"RISCVISD::SHL_VL", SDT_RISCVIntBinOp_VL>; def riscv_sra_vl : SDNode<"RISCVISD::SRA_VL", SDT_RISCVIntBinOp_VL>; def riscv_srl_vl : SDNode<"RISCVISD::SRL_VL", SDT_RISCVIntBinOp_VL>; +def riscv_smin_vl : SDNode<"RISCVISD::SMIN_VL", SDT_RISCVIntBinOp_VL>; +def riscv_smax_vl : SDNode<"RISCVISD::SMAX_VL", SDT_RISCVIntBinOp_VL>; +def riscv_umin_vl : SDNode<"RISCVISD::UMIN_VL", SDT_RISCVIntBinOp_VL>; +def riscv_umax_vl : SDNode<"RISCVISD::UMAX_VL", SDT_RISCVIntBinOp_VL>; def riscv_fadd_vl : SDNode<"RISCVISD::FADD_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>; def riscv_fsub_vl : SDNode<"RISCVISD::FSUB_VL", SDT_RISCVFPBinOp_VL>; def riscv_fmul_vl : SDNode<"RISCVISD::FMUL_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>; @@ -221,6 +225,12 @@ defm "" : VPatBinaryVL_VV_VX_VI; defm "" : VPatBinaryVL_VV_VX_VI; +// 12.9. Vector Integer Min/Max Instructions +defm "" : VPatBinaryVL_VV_VX; +defm "" : VPatBinaryVL_VV_VX; +defm "" : VPatBinaryVL_VV_VX; +defm "" : VPatBinaryVL_VV_VX; + // 12.10. Vector Single-Width Integer Multiply Instructions defm "" : VPatBinaryVL_VV_VX; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -888,6 +888,294 @@ ret void } +define void @smin_v16i8(<16 x i8>* %x, <16 x i8>* %y) { +; CHECK-LABEL: smin_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vle8.v v26, (a1) +; CHECK-NEXT: vmin.vv v25, v25, v26 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = load <16 x i8>, <16 x i8>* %y + %cc = icmp slt <16 x i8> %a, %b + %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b + store <16 x i8> %c, <16 x i8>* %x + ret void +} + +define void @smin_v8i16(<8 x i16>* %x, <8 x i16>* %y) { +; CHECK-LABEL: smin_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vle16.v v26, (a1) +; CHECK-NEXT: vmin.vv v25, v25, v26 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = load <8 x i16>, <8 x i16>* %y + %cc = icmp slt <8 x i16> %a, %b + %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b + store <8 x i16> %c, <8 x i16>* %x + ret void +} + +define void @smin_v4i32(<4 x i32>* %x, <4 x i32>* %y) { +; CHECK-LABEL: smin_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vle32.v v26, (a1) +; CHECK-NEXT: vmin.vv v25, v25, v26 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = load <4 x i32>, <4 x i32>* %y + %cc = icmp slt <4 x i32> %a, %b + %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b + store <4 x i32> %c, <4 x i32>* %x + ret void +} + +define void @smin_v2i64(<2 x i64>* %x, <2 x i64>* %y) { +; CHECK-LABEL: smin_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 2 +; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vle64.v v26, (a1) +; CHECK-NEXT: vmin.vv v25, v25, v26 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = load <2 x i64>, <2 x i64>* %x + %b = load <2 x i64>, <2 x i64>* %y + %cc = icmp slt <2 x i64> %a, %b + %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b + store <2 x i64> %c, <2 x i64>* %x + ret void +} + +define void @smax_v16i8(<16 x i8>* %x, <16 x i8>* %y) { +; CHECK-LABEL: smax_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vle8.v v26, (a1) +; CHECK-NEXT: vmax.vv v25, v25, v26 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = load <16 x i8>, <16 x i8>* %y + %cc = icmp sgt <16 x i8> %a, %b + %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b + store <16 x i8> %c, <16 x i8>* %x + ret void +} + +define void @smax_v8i16(<8 x i16>* %x, <8 x i16>* %y) { +; CHECK-LABEL: smax_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vle16.v v26, (a1) +; CHECK-NEXT: vmax.vv v25, v25, v26 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = load <8 x i16>, <8 x i16>* %y + %cc = icmp sgt <8 x i16> %a, %b + %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b + store <8 x i16> %c, <8 x i16>* %x + ret void +} + +define void @smax_v4i32(<4 x i32>* %x, <4 x i32>* %y) { +; CHECK-LABEL: smax_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vle32.v v26, (a1) +; CHECK-NEXT: vmax.vv v25, v25, v26 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = load <4 x i32>, <4 x i32>* %y + %cc = icmp sgt <4 x i32> %a, %b + %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b + store <4 x i32> %c, <4 x i32>* %x + ret void +} + +define void @smax_v2i64(<2 x i64>* %x, <2 x i64>* %y) { +; CHECK-LABEL: smax_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 2 +; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vle64.v v26, (a1) +; CHECK-NEXT: vmax.vv v25, v25, v26 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = load <2 x i64>, <2 x i64>* %x + %b = load <2 x i64>, <2 x i64>* %y + %cc = icmp sgt <2 x i64> %a, %b + %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b + store <2 x i64> %c, <2 x i64>* %x + ret void +} + +define void @umin_v16i8(<16 x i8>* %x, <16 x i8>* %y) { +; CHECK-LABEL: umin_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vle8.v v26, (a1) +; CHECK-NEXT: vminu.vv v25, v25, v26 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = load <16 x i8>, <16 x i8>* %y + %cc = icmp ult <16 x i8> %a, %b + %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b + store <16 x i8> %c, <16 x i8>* %x + ret void +} + +define void @umin_v8i16(<8 x i16>* %x, <8 x i16>* %y) { +; CHECK-LABEL: umin_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vle16.v v26, (a1) +; CHECK-NEXT: vminu.vv v25, v25, v26 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = load <8 x i16>, <8 x i16>* %y + %cc = icmp ult <8 x i16> %a, %b + %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b + store <8 x i16> %c, <8 x i16>* %x + ret void +} + +define void @umin_v4i32(<4 x i32>* %x, <4 x i32>* %y) { +; CHECK-LABEL: umin_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vle32.v v26, (a1) +; CHECK-NEXT: vminu.vv v25, v25, v26 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = load <4 x i32>, <4 x i32>* %y + %cc = icmp ult <4 x i32> %a, %b + %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b + store <4 x i32> %c, <4 x i32>* %x + ret void +} + +define void @umin_v2i64(<2 x i64>* %x, <2 x i64>* %y) { +; CHECK-LABEL: umin_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 2 +; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vle64.v v26, (a1) +; CHECK-NEXT: vminu.vv v25, v25, v26 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = load <2 x i64>, <2 x i64>* %x + %b = load <2 x i64>, <2 x i64>* %y + %cc = icmp ult <2 x i64> %a, %b + %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b + store <2 x i64> %c, <2 x i64>* %x + ret void +} + +define void @umax_v16i8(<16 x i8>* %x, <16 x i8>* %y) { +; CHECK-LABEL: umax_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 16 +; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vle8.v v26, (a1) +; CHECK-NEXT: vmaxu.vv v25, v25, v26 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = load <16 x i8>, <16 x i8>* %y + %cc = icmp ugt <16 x i8> %a, %b + %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b + store <16 x i8> %c, <16 x i8>* %x + ret void +} + +define void @umax_v8i16(<8 x i16>* %x, <8 x i16>* %y) { +; CHECK-LABEL: umax_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vle16.v v26, (a1) +; CHECK-NEXT: vmaxu.vv v25, v25, v26 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = load <8 x i16>, <8 x i16>* %y + %cc = icmp ugt <8 x i16> %a, %b + %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b + store <8 x i16> %c, <8 x i16>* %x + ret void +} + +define void @umax_v4i32(<4 x i32>* %x, <4 x i32>* %y) { +; CHECK-LABEL: umax_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vle32.v v26, (a1) +; CHECK-NEXT: vmaxu.vv v25, v25, v26 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = load <4 x i32>, <4 x i32>* %y + %cc = icmp ugt <4 x i32> %a, %b + %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b + store <4 x i32> %c, <4 x i32>* %x + ret void +} + +define void @umax_v2i64(<2 x i64>* %x, <2 x i64>* %y) { +; CHECK-LABEL: umax_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 2 +; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vle64.v v26, (a1) +; CHECK-NEXT: vmaxu.vv v25, v25, v26 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = load <2 x i64>, <2 x i64>* %x + %b = load <2 x i64>, <2 x i64>* %y + %cc = icmp ugt <2 x i64> %a, %b + %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b + store <2 x i64> %c, <2 x i64>* %x + ret void +} + define void @add_v32i8(<32 x i8>* %x, <32 x i8>* %y) { ; LMULMAX2-LABEL: add_v32i8: ; LMULMAX2: # %bb.0: @@ -3489,3 +3777,803 @@ store <4 x i64> %c, <4 x i64>* %x ret void } + +define void @smin_v32i8(<32 x i8>* %x, <32 x i8>* %y) { +; LMULMAX2-LABEL: smin_v32i8: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vle8.v v26, (a0) +; LMULMAX2-NEXT: vle8.v v28, (a1) +; LMULMAX2-NEXT: vmin.vv v26, v26, v28 +; LMULMAX2-NEXT: vse8.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: smin_v32i8: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: addi a2, zero, 16 +; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle8.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a2, a0, 16 +; LMULMAX1-RV32-NEXT: vle8.v v26, (a2) +; LMULMAX1-RV32-NEXT: addi a3, a1, 16 +; LMULMAX1-RV32-NEXT: vle8.v v27, (a3) +; LMULMAX1-RV32-NEXT: vle8.v v28, (a1) +; LMULMAX1-RV32-NEXT: vmin.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vmin.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vse8.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse8.v v26, (a2) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: smin_v32i8: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: addi a2, zero, 16 +; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; LMULMAX1-RV64-NEXT: vle8.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a2, a1, 16 +; LMULMAX1-RV64-NEXT: vle8.v v26, (a2) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle8.v v27, (a2) +; LMULMAX1-RV64-NEXT: vle8.v v28, (a1) +; LMULMAX1-RV64-NEXT: vmin.vv v26, v27, v26 +; LMULMAX1-RV64-NEXT: vmin.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: vse8.v v25, (a0) +; LMULMAX1-RV64-NEXT: vse8.v v26, (a2) +; LMULMAX1-RV64-NEXT: ret + %a = load <32 x i8>, <32 x i8>* %x + %b = load <32 x i8>, <32 x i8>* %y + %cc = icmp slt <32 x i8> %a, %b + %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b + store <32 x i8> %c, <32 x i8>* %x + ret void +} + +define void @smin_v16i16(<16 x i16>* %x, <16 x i16>* %y) { +; LMULMAX2-LABEL: smin_v16i16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 16 +; LMULMAX2-NEXT: vsetvli a2, a2, e16,m2,ta,mu +; LMULMAX2-NEXT: vle16.v v26, (a0) +; LMULMAX2-NEXT: vle16.v v28, (a1) +; LMULMAX2-NEXT: vmin.vv v26, v26, v28 +; LMULMAX2-NEXT: vse16.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: smin_v16i16: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: addi a2, zero, 8 +; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle16.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a2, a0, 16 +; LMULMAX1-RV32-NEXT: vle16.v v26, (a2) +; LMULMAX1-RV32-NEXT: addi a3, a1, 16 +; LMULMAX1-RV32-NEXT: vle16.v v27, (a3) +; LMULMAX1-RV32-NEXT: vle16.v v28, (a1) +; LMULMAX1-RV32-NEXT: vmin.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vmin.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vse16.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse16.v v26, (a2) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: smin_v16i16: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: addi a2, zero, 8 +; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; LMULMAX1-RV64-NEXT: vle16.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a2, a1, 16 +; LMULMAX1-RV64-NEXT: vle16.v v26, (a2) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle16.v v27, (a2) +; LMULMAX1-RV64-NEXT: vle16.v v28, (a1) +; LMULMAX1-RV64-NEXT: vmin.vv v26, v27, v26 +; LMULMAX1-RV64-NEXT: vmin.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: vse16.v v25, (a0) +; LMULMAX1-RV64-NEXT: vse16.v v26, (a2) +; LMULMAX1-RV64-NEXT: ret + %a = load <16 x i16>, <16 x i16>* %x + %b = load <16 x i16>, <16 x i16>* %y + %cc = icmp slt <16 x i16> %a, %b + %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b + store <16 x i16> %c, <16 x i16>* %x + ret void +} + +define void @smin_v8i32(<8 x i32>* %x, <8 x i32>* %y) { +; LMULMAX2-LABEL: smin_v8i32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 8 +; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu +; LMULMAX2-NEXT: vle32.v v26, (a0) +; LMULMAX2-NEXT: vle32.v v28, (a1) +; LMULMAX2-NEXT: vmin.vv v26, v26, v28 +; LMULMAX2-NEXT: vse32.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: smin_v8i32: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: addi a2, zero, 4 +; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle32.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a2, a0, 16 +; LMULMAX1-RV32-NEXT: vle32.v v26, (a2) +; LMULMAX1-RV32-NEXT: addi a3, a1, 16 +; LMULMAX1-RV32-NEXT: vle32.v v27, (a3) +; LMULMAX1-RV32-NEXT: vle32.v v28, (a1) +; LMULMAX1-RV32-NEXT: vmin.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vmin.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse32.v v26, (a2) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: smin_v8i32: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: addi a2, zero, 4 +; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; LMULMAX1-RV64-NEXT: vle32.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a2, a1, 16 +; LMULMAX1-RV64-NEXT: vle32.v v26, (a2) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle32.v v27, (a2) +; LMULMAX1-RV64-NEXT: vle32.v v28, (a1) +; LMULMAX1-RV64-NEXT: vmin.vv v26, v27, v26 +; LMULMAX1-RV64-NEXT: vmin.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: vse32.v v25, (a0) +; LMULMAX1-RV64-NEXT: vse32.v v26, (a2) +; LMULMAX1-RV64-NEXT: ret + %a = load <8 x i32>, <8 x i32>* %x + %b = load <8 x i32>, <8 x i32>* %y + %cc = icmp slt <8 x i32> %a, %b + %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b + store <8 x i32> %c, <8 x i32>* %x + ret void +} + +define void @smin_v4i64(<4 x i64>* %x, <4 x i64>* %y) { +; LMULMAX2-LABEL: smin_v4i64: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 4 +; LMULMAX2-NEXT: vsetvli a2, a2, e64,m2,ta,mu +; LMULMAX2-NEXT: vle64.v v26, (a0) +; LMULMAX2-NEXT: vle64.v v28, (a1) +; LMULMAX2-NEXT: vmin.vv v26, v26, v28 +; LMULMAX2-NEXT: vse64.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: smin_v4i64: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: addi a2, zero, 2 +; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a2, a0, 16 +; LMULMAX1-RV32-NEXT: vle64.v v26, (a2) +; LMULMAX1-RV32-NEXT: addi a3, a1, 16 +; LMULMAX1-RV32-NEXT: vle64.v v27, (a3) +; LMULMAX1-RV32-NEXT: vle64.v v28, (a1) +; LMULMAX1-RV32-NEXT: vmin.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vmin.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse64.v v26, (a2) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: smin_v4i64: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: addi a2, zero, 2 +; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a2, a1, 16 +; LMULMAX1-RV64-NEXT: vle64.v v26, (a2) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v27, (a2) +; LMULMAX1-RV64-NEXT: vle64.v v28, (a1) +; LMULMAX1-RV64-NEXT: vmin.vv v26, v27, v26 +; LMULMAX1-RV64-NEXT: vmin.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) +; LMULMAX1-RV64-NEXT: vse64.v v26, (a2) +; LMULMAX1-RV64-NEXT: ret + %a = load <4 x i64>, <4 x i64>* %x + %b = load <4 x i64>, <4 x i64>* %y + %cc = icmp slt <4 x i64> %a, %b + %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b + store <4 x i64> %c, <4 x i64>* %x + ret void +} + +define void @smax_v32i8(<32 x i8>* %x, <32 x i8>* %y) { +; LMULMAX2-LABEL: smax_v32i8: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vle8.v v26, (a0) +; LMULMAX2-NEXT: vle8.v v28, (a1) +; LMULMAX2-NEXT: vmax.vv v26, v26, v28 +; LMULMAX2-NEXT: vse8.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: smax_v32i8: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: addi a2, zero, 16 +; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle8.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a2, a0, 16 +; LMULMAX1-RV32-NEXT: vle8.v v26, (a2) +; LMULMAX1-RV32-NEXT: addi a3, a1, 16 +; LMULMAX1-RV32-NEXT: vle8.v v27, (a3) +; LMULMAX1-RV32-NEXT: vle8.v v28, (a1) +; LMULMAX1-RV32-NEXT: vmax.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vmax.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vse8.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse8.v v26, (a2) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: smax_v32i8: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: addi a2, zero, 16 +; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; LMULMAX1-RV64-NEXT: vle8.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a2, a1, 16 +; LMULMAX1-RV64-NEXT: vle8.v v26, (a2) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle8.v v27, (a2) +; LMULMAX1-RV64-NEXT: vle8.v v28, (a1) +; LMULMAX1-RV64-NEXT: vmax.vv v26, v27, v26 +; LMULMAX1-RV64-NEXT: vmax.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: vse8.v v25, (a0) +; LMULMAX1-RV64-NEXT: vse8.v v26, (a2) +; LMULMAX1-RV64-NEXT: ret + %a = load <32 x i8>, <32 x i8>* %x + %b = load <32 x i8>, <32 x i8>* %y + %cc = icmp sgt <32 x i8> %a, %b + %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b + store <32 x i8> %c, <32 x i8>* %x + ret void +} + +define void @smax_v16i16(<16 x i16>* %x, <16 x i16>* %y) { +; LMULMAX2-LABEL: smax_v16i16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 16 +; LMULMAX2-NEXT: vsetvli a2, a2, e16,m2,ta,mu +; LMULMAX2-NEXT: vle16.v v26, (a0) +; LMULMAX2-NEXT: vle16.v v28, (a1) +; LMULMAX2-NEXT: vmax.vv v26, v26, v28 +; LMULMAX2-NEXT: vse16.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: smax_v16i16: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: addi a2, zero, 8 +; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle16.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a2, a0, 16 +; LMULMAX1-RV32-NEXT: vle16.v v26, (a2) +; LMULMAX1-RV32-NEXT: addi a3, a1, 16 +; LMULMAX1-RV32-NEXT: vle16.v v27, (a3) +; LMULMAX1-RV32-NEXT: vle16.v v28, (a1) +; LMULMAX1-RV32-NEXT: vmax.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vmax.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vse16.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse16.v v26, (a2) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: smax_v16i16: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: addi a2, zero, 8 +; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; LMULMAX1-RV64-NEXT: vle16.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a2, a1, 16 +; LMULMAX1-RV64-NEXT: vle16.v v26, (a2) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle16.v v27, (a2) +; LMULMAX1-RV64-NEXT: vle16.v v28, (a1) +; LMULMAX1-RV64-NEXT: vmax.vv v26, v27, v26 +; LMULMAX1-RV64-NEXT: vmax.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: vse16.v v25, (a0) +; LMULMAX1-RV64-NEXT: vse16.v v26, (a2) +; LMULMAX1-RV64-NEXT: ret + %a = load <16 x i16>, <16 x i16>* %x + %b = load <16 x i16>, <16 x i16>* %y + %cc = icmp sgt <16 x i16> %a, %b + %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b + store <16 x i16> %c, <16 x i16>* %x + ret void +} + +define void @smax_v8i32(<8 x i32>* %x, <8 x i32>* %y) { +; LMULMAX2-LABEL: smax_v8i32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 8 +; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu +; LMULMAX2-NEXT: vle32.v v26, (a0) +; LMULMAX2-NEXT: vle32.v v28, (a1) +; LMULMAX2-NEXT: vmax.vv v26, v26, v28 +; LMULMAX2-NEXT: vse32.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: smax_v8i32: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: addi a2, zero, 4 +; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle32.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a2, a0, 16 +; LMULMAX1-RV32-NEXT: vle32.v v26, (a2) +; LMULMAX1-RV32-NEXT: addi a3, a1, 16 +; LMULMAX1-RV32-NEXT: vle32.v v27, (a3) +; LMULMAX1-RV32-NEXT: vle32.v v28, (a1) +; LMULMAX1-RV32-NEXT: vmax.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vmax.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse32.v v26, (a2) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: smax_v8i32: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: addi a2, zero, 4 +; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; LMULMAX1-RV64-NEXT: vle32.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a2, a1, 16 +; LMULMAX1-RV64-NEXT: vle32.v v26, (a2) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle32.v v27, (a2) +; LMULMAX1-RV64-NEXT: vle32.v v28, (a1) +; LMULMAX1-RV64-NEXT: vmax.vv v26, v27, v26 +; LMULMAX1-RV64-NEXT: vmax.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: vse32.v v25, (a0) +; LMULMAX1-RV64-NEXT: vse32.v v26, (a2) +; LMULMAX1-RV64-NEXT: ret + %a = load <8 x i32>, <8 x i32>* %x + %b = load <8 x i32>, <8 x i32>* %y + %cc = icmp sgt <8 x i32> %a, %b + %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b + store <8 x i32> %c, <8 x i32>* %x + ret void +} + +define void @smax_v4i64(<4 x i64>* %x, <4 x i64>* %y) { +; LMULMAX2-LABEL: smax_v4i64: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 4 +; LMULMAX2-NEXT: vsetvli a2, a2, e64,m2,ta,mu +; LMULMAX2-NEXT: vle64.v v26, (a0) +; LMULMAX2-NEXT: vle64.v v28, (a1) +; LMULMAX2-NEXT: vmax.vv v26, v26, v28 +; LMULMAX2-NEXT: vse64.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: smax_v4i64: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: addi a2, zero, 2 +; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a2, a0, 16 +; LMULMAX1-RV32-NEXT: vle64.v v26, (a2) +; LMULMAX1-RV32-NEXT: addi a3, a1, 16 +; LMULMAX1-RV32-NEXT: vle64.v v27, (a3) +; LMULMAX1-RV32-NEXT: vle64.v v28, (a1) +; LMULMAX1-RV32-NEXT: vmax.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vmax.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse64.v v26, (a2) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: smax_v4i64: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: addi a2, zero, 2 +; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a2, a1, 16 +; LMULMAX1-RV64-NEXT: vle64.v v26, (a2) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v27, (a2) +; LMULMAX1-RV64-NEXT: vle64.v v28, (a1) +; LMULMAX1-RV64-NEXT: vmax.vv v26, v27, v26 +; LMULMAX1-RV64-NEXT: vmax.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) +; LMULMAX1-RV64-NEXT: vse64.v v26, (a2) +; LMULMAX1-RV64-NEXT: ret + %a = load <4 x i64>, <4 x i64>* %x + %b = load <4 x i64>, <4 x i64>* %y + %cc = icmp sgt <4 x i64> %a, %b + %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b + store <4 x i64> %c, <4 x i64>* %x + ret void +} + +define void @umin_v32i8(<32 x i8>* %x, <32 x i8>* %y) { +; LMULMAX2-LABEL: umin_v32i8: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vle8.v v26, (a0) +; LMULMAX2-NEXT: vle8.v v28, (a1) +; LMULMAX2-NEXT: vminu.vv v26, v26, v28 +; LMULMAX2-NEXT: vse8.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: umin_v32i8: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: addi a2, zero, 16 +; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle8.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a2, a0, 16 +; LMULMAX1-RV32-NEXT: vle8.v v26, (a2) +; LMULMAX1-RV32-NEXT: addi a3, a1, 16 +; LMULMAX1-RV32-NEXT: vle8.v v27, (a3) +; LMULMAX1-RV32-NEXT: vle8.v v28, (a1) +; LMULMAX1-RV32-NEXT: vminu.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vminu.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vse8.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse8.v v26, (a2) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: umin_v32i8: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: addi a2, zero, 16 +; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; LMULMAX1-RV64-NEXT: vle8.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a2, a1, 16 +; LMULMAX1-RV64-NEXT: vle8.v v26, (a2) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle8.v v27, (a2) +; LMULMAX1-RV64-NEXT: vle8.v v28, (a1) +; LMULMAX1-RV64-NEXT: vminu.vv v26, v27, v26 +; LMULMAX1-RV64-NEXT: vminu.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: vse8.v v25, (a0) +; LMULMAX1-RV64-NEXT: vse8.v v26, (a2) +; LMULMAX1-RV64-NEXT: ret + %a = load <32 x i8>, <32 x i8>* %x + %b = load <32 x i8>, <32 x i8>* %y + %cc = icmp ult <32 x i8> %a, %b + %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b + store <32 x i8> %c, <32 x i8>* %x + ret void +} + +define void @umin_v16i16(<16 x i16>* %x, <16 x i16>* %y) { +; LMULMAX2-LABEL: umin_v16i16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 16 +; LMULMAX2-NEXT: vsetvli a2, a2, e16,m2,ta,mu +; LMULMAX2-NEXT: vle16.v v26, (a0) +; LMULMAX2-NEXT: vle16.v v28, (a1) +; LMULMAX2-NEXT: vminu.vv v26, v26, v28 +; LMULMAX2-NEXT: vse16.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: umin_v16i16: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: addi a2, zero, 8 +; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle16.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a2, a0, 16 +; LMULMAX1-RV32-NEXT: vle16.v v26, (a2) +; LMULMAX1-RV32-NEXT: addi a3, a1, 16 +; LMULMAX1-RV32-NEXT: vle16.v v27, (a3) +; LMULMAX1-RV32-NEXT: vle16.v v28, (a1) +; LMULMAX1-RV32-NEXT: vminu.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vminu.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vse16.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse16.v v26, (a2) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: umin_v16i16: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: addi a2, zero, 8 +; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; LMULMAX1-RV64-NEXT: vle16.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a2, a1, 16 +; LMULMAX1-RV64-NEXT: vle16.v v26, (a2) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle16.v v27, (a2) +; LMULMAX1-RV64-NEXT: vle16.v v28, (a1) +; LMULMAX1-RV64-NEXT: vminu.vv v26, v27, v26 +; LMULMAX1-RV64-NEXT: vminu.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: vse16.v v25, (a0) +; LMULMAX1-RV64-NEXT: vse16.v v26, (a2) +; LMULMAX1-RV64-NEXT: ret + %a = load <16 x i16>, <16 x i16>* %x + %b = load <16 x i16>, <16 x i16>* %y + %cc = icmp ult <16 x i16> %a, %b + %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b + store <16 x i16> %c, <16 x i16>* %x + ret void +} + +define void @umin_v8i32(<8 x i32>* %x, <8 x i32>* %y) { +; LMULMAX2-LABEL: umin_v8i32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 8 +; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu +; LMULMAX2-NEXT: vle32.v v26, (a0) +; LMULMAX2-NEXT: vle32.v v28, (a1) +; LMULMAX2-NEXT: vminu.vv v26, v26, v28 +; LMULMAX2-NEXT: vse32.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: umin_v8i32: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: addi a2, zero, 4 +; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle32.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a2, a0, 16 +; LMULMAX1-RV32-NEXT: vle32.v v26, (a2) +; LMULMAX1-RV32-NEXT: addi a3, a1, 16 +; LMULMAX1-RV32-NEXT: vle32.v v27, (a3) +; LMULMAX1-RV32-NEXT: vle32.v v28, (a1) +; LMULMAX1-RV32-NEXT: vminu.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vminu.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse32.v v26, (a2) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: umin_v8i32: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: addi a2, zero, 4 +; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; LMULMAX1-RV64-NEXT: vle32.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a2, a1, 16 +; LMULMAX1-RV64-NEXT: vle32.v v26, (a2) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle32.v v27, (a2) +; LMULMAX1-RV64-NEXT: vle32.v v28, (a1) +; LMULMAX1-RV64-NEXT: vminu.vv v26, v27, v26 +; LMULMAX1-RV64-NEXT: vminu.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: vse32.v v25, (a0) +; LMULMAX1-RV64-NEXT: vse32.v v26, (a2) +; LMULMAX1-RV64-NEXT: ret + %a = load <8 x i32>, <8 x i32>* %x + %b = load <8 x i32>, <8 x i32>* %y + %cc = icmp ult <8 x i32> %a, %b + %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b + store <8 x i32> %c, <8 x i32>* %x + ret void +} + +define void @umin_v4i64(<4 x i64>* %x, <4 x i64>* %y) { +; LMULMAX2-LABEL: umin_v4i64: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 4 +; LMULMAX2-NEXT: vsetvli a2, a2, e64,m2,ta,mu +; LMULMAX2-NEXT: vle64.v v26, (a0) +; LMULMAX2-NEXT: vle64.v v28, (a1) +; LMULMAX2-NEXT: vminu.vv v26, v26, v28 +; LMULMAX2-NEXT: vse64.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: umin_v4i64: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: addi a2, zero, 2 +; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a2, a0, 16 +; LMULMAX1-RV32-NEXT: vle64.v v26, (a2) +; LMULMAX1-RV32-NEXT: addi a3, a1, 16 +; LMULMAX1-RV32-NEXT: vle64.v v27, (a3) +; LMULMAX1-RV32-NEXT: vle64.v v28, (a1) +; LMULMAX1-RV32-NEXT: vminu.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vminu.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse64.v v26, (a2) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: umin_v4i64: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: addi a2, zero, 2 +; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a2, a1, 16 +; LMULMAX1-RV64-NEXT: vle64.v v26, (a2) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v27, (a2) +; LMULMAX1-RV64-NEXT: vle64.v v28, (a1) +; LMULMAX1-RV64-NEXT: vminu.vv v26, v27, v26 +; LMULMAX1-RV64-NEXT: vminu.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) +; LMULMAX1-RV64-NEXT: vse64.v v26, (a2) +; LMULMAX1-RV64-NEXT: ret + %a = load <4 x i64>, <4 x i64>* %x + %b = load <4 x i64>, <4 x i64>* %y + %cc = icmp ult <4 x i64> %a, %b + %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b + store <4 x i64> %c, <4 x i64>* %x + ret void +} + +define void @umax_v32i8(<32 x i8>* %x, <32 x i8>* %y) { +; LMULMAX2-LABEL: umax_v32i8: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vle8.v v26, (a0) +; LMULMAX2-NEXT: vle8.v v28, (a1) +; LMULMAX2-NEXT: vmaxu.vv v26, v26, v28 +; LMULMAX2-NEXT: vse8.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: umax_v32i8: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: addi a2, zero, 16 +; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle8.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a2, a0, 16 +; LMULMAX1-RV32-NEXT: vle8.v v26, (a2) +; LMULMAX1-RV32-NEXT: addi a3, a1, 16 +; LMULMAX1-RV32-NEXT: vle8.v v27, (a3) +; LMULMAX1-RV32-NEXT: vle8.v v28, (a1) +; LMULMAX1-RV32-NEXT: vmaxu.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vmaxu.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vse8.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse8.v v26, (a2) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: umax_v32i8: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: addi a2, zero, 16 +; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e8,m1,ta,mu +; LMULMAX1-RV64-NEXT: vle8.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a2, a1, 16 +; LMULMAX1-RV64-NEXT: vle8.v v26, (a2) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle8.v v27, (a2) +; LMULMAX1-RV64-NEXT: vle8.v v28, (a1) +; LMULMAX1-RV64-NEXT: vmaxu.vv v26, v27, v26 +; LMULMAX1-RV64-NEXT: vmaxu.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: vse8.v v25, (a0) +; LMULMAX1-RV64-NEXT: vse8.v v26, (a2) +; LMULMAX1-RV64-NEXT: ret + %a = load <32 x i8>, <32 x i8>* %x + %b = load <32 x i8>, <32 x i8>* %y + %cc = icmp ugt <32 x i8> %a, %b + %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b + store <32 x i8> %c, <32 x i8>* %x + ret void +} + +define void @umax_v16i16(<16 x i16>* %x, <16 x i16>* %y) { +; LMULMAX2-LABEL: umax_v16i16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 16 +; LMULMAX2-NEXT: vsetvli a2, a2, e16,m2,ta,mu +; LMULMAX2-NEXT: vle16.v v26, (a0) +; LMULMAX2-NEXT: vle16.v v28, (a1) +; LMULMAX2-NEXT: vmaxu.vv v26, v26, v28 +; LMULMAX2-NEXT: vse16.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: umax_v16i16: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: addi a2, zero, 8 +; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle16.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a2, a0, 16 +; LMULMAX1-RV32-NEXT: vle16.v v26, (a2) +; LMULMAX1-RV32-NEXT: addi a3, a1, 16 +; LMULMAX1-RV32-NEXT: vle16.v v27, (a3) +; LMULMAX1-RV32-NEXT: vle16.v v28, (a1) +; LMULMAX1-RV32-NEXT: vmaxu.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vmaxu.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vse16.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse16.v v26, (a2) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: umax_v16i16: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: addi a2, zero, 8 +; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; LMULMAX1-RV64-NEXT: vle16.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a2, a1, 16 +; LMULMAX1-RV64-NEXT: vle16.v v26, (a2) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle16.v v27, (a2) +; LMULMAX1-RV64-NEXT: vle16.v v28, (a1) +; LMULMAX1-RV64-NEXT: vmaxu.vv v26, v27, v26 +; LMULMAX1-RV64-NEXT: vmaxu.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: vse16.v v25, (a0) +; LMULMAX1-RV64-NEXT: vse16.v v26, (a2) +; LMULMAX1-RV64-NEXT: ret + %a = load <16 x i16>, <16 x i16>* %x + %b = load <16 x i16>, <16 x i16>* %y + %cc = icmp ugt <16 x i16> %a, %b + %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b + store <16 x i16> %c, <16 x i16>* %x + ret void +} + +define void @umax_v8i32(<8 x i32>* %x, <8 x i32>* %y) { +; LMULMAX2-LABEL: umax_v8i32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 8 +; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu +; LMULMAX2-NEXT: vle32.v v26, (a0) +; LMULMAX2-NEXT: vle32.v v28, (a1) +; LMULMAX2-NEXT: vmaxu.vv v26, v26, v28 +; LMULMAX2-NEXT: vse32.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: umax_v8i32: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: addi a2, zero, 4 +; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle32.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a2, a0, 16 +; LMULMAX1-RV32-NEXT: vle32.v v26, (a2) +; LMULMAX1-RV32-NEXT: addi a3, a1, 16 +; LMULMAX1-RV32-NEXT: vle32.v v27, (a3) +; LMULMAX1-RV32-NEXT: vle32.v v28, (a1) +; LMULMAX1-RV32-NEXT: vmaxu.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vmaxu.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vse32.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse32.v v26, (a2) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: umax_v8i32: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: addi a2, zero, 4 +; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; LMULMAX1-RV64-NEXT: vle32.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a2, a1, 16 +; LMULMAX1-RV64-NEXT: vle32.v v26, (a2) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle32.v v27, (a2) +; LMULMAX1-RV64-NEXT: vle32.v v28, (a1) +; LMULMAX1-RV64-NEXT: vmaxu.vv v26, v27, v26 +; LMULMAX1-RV64-NEXT: vmaxu.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: vse32.v v25, (a0) +; LMULMAX1-RV64-NEXT: vse32.v v26, (a2) +; LMULMAX1-RV64-NEXT: ret + %a = load <8 x i32>, <8 x i32>* %x + %b = load <8 x i32>, <8 x i32>* %y + %cc = icmp ugt <8 x i32> %a, %b + %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b + store <8 x i32> %c, <8 x i32>* %x + ret void +} + +define void @umax_v4i64(<4 x i64>* %x, <4 x i64>* %y) { +; LMULMAX2-LABEL: umax_v4i64: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 4 +; LMULMAX2-NEXT: vsetvli a2, a2, e64,m2,ta,mu +; LMULMAX2-NEXT: vle64.v v26, (a0) +; LMULMAX2-NEXT: vle64.v v28, (a1) +; LMULMAX2-NEXT: vmaxu.vv v26, v26, v28 +; LMULMAX2-NEXT: vse64.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: umax_v4i64: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: addi a2, zero, 2 +; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) +; LMULMAX1-RV32-NEXT: addi a2, a0, 16 +; LMULMAX1-RV32-NEXT: vle64.v v26, (a2) +; LMULMAX1-RV32-NEXT: addi a3, a1, 16 +; LMULMAX1-RV32-NEXT: vle64.v v27, (a3) +; LMULMAX1-RV32-NEXT: vle64.v v28, (a1) +; LMULMAX1-RV32-NEXT: vmaxu.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vmaxu.vv v25, v25, v28 +; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) +; LMULMAX1-RV32-NEXT: vse64.v v26, (a2) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: umax_v4i64: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: addi a2, zero, 2 +; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) +; LMULMAX1-RV64-NEXT: addi a2, a1, 16 +; LMULMAX1-RV64-NEXT: vle64.v v26, (a2) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v27, (a2) +; LMULMAX1-RV64-NEXT: vle64.v v28, (a1) +; LMULMAX1-RV64-NEXT: vmaxu.vv v26, v27, v26 +; LMULMAX1-RV64-NEXT: vmaxu.vv v25, v25, v28 +; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) +; LMULMAX1-RV64-NEXT: vse64.v v26, (a2) +; LMULMAX1-RV64-NEXT: ret + %a = load <4 x i64>, <4 x i64>* %x + %b = load <4 x i64>, <4 x i64>* %y + %cc = icmp ugt <4 x i64> %a, %b + %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b + store <4 x i64> %c, <4 x i64>* %x + ret void +}