diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3456,9 +3456,21 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { - const MVT XLenVT = Subtarget.getXLenVT(); + assert(VT.isScalableVector() && "Expect VT is scalable vector type."); + const MVT XLenVT = Subtarget.getXLenVT(); SDValue Passthru = DAG.getUNDEF(VT); + + if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isNullConstant(Scalar.getOperand(1))) { + MVT ExtractedVT = Scalar.getOperand(0).getSimpleValueType(); + if (ExtractedVT.bitsLE(VT)) + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, + Scalar.getOperand(0), DAG.getConstant(0, DL, XLenVT)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Scalar.getOperand(0), + DAG.getConstant(0, DL, XLenVT)); + } + if (VT.isFloatingPoint()) { // TODO: Use vmv.v.i for appropriate constants // Use M1 or smaller to avoid over constraining register allocation @@ -7645,6 +7657,25 @@ DAG.getConstant(0, DL, XLenVT)); } +// Function to extract the first element of Vec. For fixed vector Vec, this +// converts it to a scalable vector before extraction, so subsequent +// optimizations don't have to handle fixed vectors. +static SDValue getFirstElement(SDValue Vec, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + SDLoc DL(Vec); + MVT XLenVT = Subtarget.getXLenVT(); + MVT VecVT = Vec.getSimpleValueType(); + MVT VecEltVT = VecVT.getVectorElementType(); + + MVT ContainerVT = VecVT; + if (VecVT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget); + Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); + } + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec, + DAG.getConstant(0, DL, XLenVT)); +} + SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -7679,9 +7710,17 @@ auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); - SDValue NeutralElem = - DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); - return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), NeutralElem, Vec, + SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); + switch (BaseOpc) { + case ISD::AND: + case ISD::OR: + case ISD::UMAX: + case ISD::UMIN: + case ISD::SMAX: + case ISD::SMIN: + StartV = getFirstElement(Vec, DAG, Subtarget); + } + return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec, Mask, VL, DL, DAG, Subtarget); } @@ -7689,11 +7728,11 @@ // the vector SDValue and the scalar SDValue required to lower this to a // RISCVISD node. static std::tuple -getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { +getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, + const RISCVSubtarget &Subtarget) { SDLoc DL(Op); auto Flags = Op->getFlags(); unsigned Opcode = Op.getOpcode(); - unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode); switch (Opcode) { default: llvm_unreachable("Unhandled reduction"); @@ -7708,10 +7747,10 @@ Op.getOperand(0)); case ISD::VECREDUCE_FMIN: return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0), - DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags)); + getFirstElement(Op.getOperand(0), DAG, Subtarget)); case ISD::VECREDUCE_FMAX: return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0), - DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags)); + getFirstElement(Op.getOperand(0), DAG, Subtarget)); } } @@ -7723,7 +7762,7 @@ unsigned RVVOpcode; SDValue VectorVal, ScalarVal; std::tie(RVVOpcode, VectorVal, ScalarVal) = - getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT); + getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget); MVT VecVT = VectorVal.getSimpleValueType(); MVT ContainerVT = VecVT; diff --git a/llvm/test/CodeGen/RISCV/double_reduct.ll b/llvm/test/CodeGen/RISCV/double_reduct.ll --- a/llvm/test/CodeGen/RISCV/double_reduct.ll +++ b/llvm/test/CodeGen/RISCV/double_reduct.ll @@ -46,9 +46,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 -; CHECK-NEXT: lui a0, 523264 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmin.vs v8, v8, v9 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a) @@ -62,9 +60,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 -; CHECK-NEXT: lui a0, 1047552 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a) @@ -114,12 +110,8 @@ ; CHECK-NEXT: vmv.s.x v11, zero ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vwredsumu.vs v10, v10, v11 -; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma -; CHECK-NEXT: vmv.x.s a0, v10 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -175,8 +167,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r1 = call i32 @llvm.vector.reduce.and.i32.v4i32(<4 x i32> %a) @@ -190,8 +181,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r1 = call i32 @llvm.vector.reduce.or.i32.v4i32(<4 x i32> %a) @@ -220,8 +210,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r1 = call i32 @llvm.vector.reduce.umin.i32.v4i32(<4 x i32> %a) @@ -235,8 +224,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v9 -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r1 = call i32 @llvm.vector.reduce.umax.i32.v4i32(<4 x i32> %a) @@ -246,27 +234,13 @@ } define i32 @smin_i32(<4 x i32> %a, <4 x i32> %b) { -; RV32-LABEL: smin_i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmin.vv v8, v8, v9 -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vredmin.vs v8, v8, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: smin_i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmin.vv v8, v8, v9 -; RV64-NEXT: lui a0, 524288 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vredmin.vs v8, v8, v9 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: smin_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r1 = call i32 @llvm.vector.reduce.smin.i32.v4i32(<4 x i32> %a) %r2 = call i32 @llvm.vector.reduce.smin.i32.v4i32(<4 x i32> %b) %r = call i32 @llvm.smin.i32(i32 %r1, i32 %r2) @@ -278,9 +252,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v9 -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r1 = call i32 @llvm.vector.reduce.smax.i32.v4i32(<4 x i32> %a) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>) @@ -1135,27 +1135,13 @@ declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>) define half @vreduce_fmin_v2f16(ptr %x) { -; RV32-LABEL: vreduce_fmin_v2f16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: lui a1, %hi(.LCPI68_0) -; RV32-NEXT: flh fa5, %lo(.LCPI68_0)(a1) -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vfmv.s.f v9, fa5 -; RV32-NEXT: vfredmin.vs v8, v8, v9 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fmin_v2f16: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI68_0) -; RV64-NEXT: flh fa5, %lo(.LCPI68_0)(a1) -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vfmv.s.f v9, fa5 -; RV64-NEXT: vfredmin.vs v8, v8, v9 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fmin_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfredmin.vs v8, v8, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %v = load <2 x half>, ptr %x %red = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> %v) ret half %red @@ -1164,81 +1150,39 @@ declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>) define half @vreduce_fmin_v4f16(ptr %x) { -; RV32-LABEL: vreduce_fmin_v4f16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: lui a1, %hi(.LCPI69_0) -; RV32-NEXT: flh fa5, %lo(.LCPI69_0)(a1) -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vfmv.s.f v9, fa5 -; RV32-NEXT: vfredmin.vs v8, v8, v9 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fmin_v4f16: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI69_0) -; RV64-NEXT: flh fa5, %lo(.LCPI69_0)(a1) -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vfmv.s.f v9, fa5 -; RV64-NEXT: vfredmin.vs v8, v8, v9 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fmin_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfredmin.vs v8, v8, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %v = load <4 x half>, ptr %x %red = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v) ret half %red } define half @vreduce_fmin_v4f16_nonans(ptr %x) { -; RV32-LABEL: vreduce_fmin_v4f16_nonans: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: lui a1, %hi(.LCPI70_0) -; RV32-NEXT: flh fa5, %lo(.LCPI70_0)(a1) -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vfmv.s.f v9, fa5 -; RV32-NEXT: vfredmin.vs v8, v8, v9 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fmin_v4f16_nonans: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI70_0) -; RV64-NEXT: flh fa5, %lo(.LCPI70_0)(a1) -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vfmv.s.f v9, fa5 -; RV64-NEXT: vfredmin.vs v8, v8, v9 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fmin_v4f16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfredmin.vs v8, v8, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %v = load <4 x half>, ptr %x %red = call nnan half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v) ret half %red } define half @vreduce_fmin_v4f16_nonans_noinfs(ptr %x) { -; RV32-LABEL: vreduce_fmin_v4f16_nonans_noinfs: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: lui a1, %hi(.LCPI71_0) -; RV32-NEXT: flh fa5, %lo(.LCPI71_0)(a1) -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vfmv.s.f v9, fa5 -; RV32-NEXT: vfredmin.vs v8, v8, v9 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fmin_v4f16_nonans_noinfs: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI71_0) -; RV64-NEXT: flh fa5, %lo(.LCPI71_0)(a1) -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: vfmv.s.f v9, fa5 -; RV64-NEXT: vfredmin.vs v8, v8, v9 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fmin_v4f16_nonans_noinfs: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vfredmin.vs v8, v8, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %v = load <4 x half>, ptr %x %red = call nnan ninf half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v) ret half %red @@ -1254,11 +1198,8 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI72_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI72_0)(a0) ; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: vfmv.s.f v16, fa5 -; CHECK-NEXT: vfredmin.vs v8, v8, v16 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x half>, ptr %x @@ -1273,9 +1214,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 523264 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmin.vs v8, v8, v9 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <2 x float>, ptr %x @@ -1290,9 +1229,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 523264 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmin.vs v8, v8, v9 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <4 x float>, ptr %x @@ -1305,9 +1242,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 522240 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmin.vs v8, v8, v9 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <4 x float>, ptr %x @@ -1316,27 +1251,13 @@ } define float @vreduce_fmin_v4f32_nonans_noinfs(ptr %x) { -; RV32-LABEL: vreduce_fmin_v4f32_nonans_noinfs: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: lui a1, %hi(.LCPI76_0) -; RV32-NEXT: flw fa5, %lo(.LCPI76_0)(a1) -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vfmv.s.f v9, fa5 -; RV32-NEXT: vfredmin.vs v8, v8, v9 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fmin_v4f32_nonans_noinfs: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI76_0) -; RV64-NEXT: flw fa5, %lo(.LCPI76_0)(a1) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vfmv.s.f v9, fa5 -; RV64-NEXT: vfredmin.vs v8, v8, v9 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fmin_v4f32_nonans_noinfs: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfredmin.vs v8, v8, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %v = load <4 x float>, ptr %x %red = call nnan ninf float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) ret float %red @@ -1359,9 +1280,7 @@ ; CHECK-NEXT: vfmin.vv v16, v24, v16 ; CHECK-NEXT: vfmin.vv v8, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: lui a0, 523264 -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vfredmin.vs v8, v8, v16 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x float>, ptr %x @@ -1372,27 +1291,13 @@ declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) define double @vreduce_fmin_v2f64(ptr %x) { -; RV32-LABEL: vreduce_fmin_v2f64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: lui a1, %hi(.LCPI78_0) -; RV32-NEXT: fld fa5, %lo(.LCPI78_0)(a1) -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vfmv.s.f v9, fa5 -; RV32-NEXT: vfredmin.vs v8, v8, v9 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fmin_v2f64: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI78_0) -; RV64-NEXT: fld fa5, %lo(.LCPI78_0)(a1) -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vfmv.s.f v9, fa5 -; RV64-NEXT: vfredmin.vs v8, v8, v9 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fmin_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vfredmin.vs v8, v8, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %v = load <2 x double>, ptr %x %red = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %v) ret double %red @@ -1401,81 +1306,39 @@ declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) define double @vreduce_fmin_v4f64(ptr %x) { -; RV32-LABEL: vreduce_fmin_v4f64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: lui a1, %hi(.LCPI79_0) -; RV32-NEXT: fld fa5, %lo(.LCPI79_0)(a1) -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vfmv.s.f v10, fa5 -; RV32-NEXT: vfredmin.vs v8, v8, v10 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fmin_v4f64: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI79_0) -; RV64-NEXT: fld fa5, %lo(.LCPI79_0)(a1) -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vfmv.s.f v10, fa5 -; RV64-NEXT: vfredmin.vs v8, v8, v10 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fmin_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vfredmin.vs v8, v8, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %v = load <4 x double>, ptr %x %red = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v) ret double %red } define double @vreduce_fmin_v4f64_nonans(ptr %x) { -; RV32-LABEL: vreduce_fmin_v4f64_nonans: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: lui a1, %hi(.LCPI80_0) -; RV32-NEXT: fld fa5, %lo(.LCPI80_0)(a1) -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vfmv.s.f v10, fa5 -; RV32-NEXT: vfredmin.vs v8, v8, v10 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fmin_v4f64_nonans: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI80_0) -; RV64-NEXT: fld fa5, %lo(.LCPI80_0)(a1) -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vfmv.s.f v10, fa5 -; RV64-NEXT: vfredmin.vs v8, v8, v10 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fmin_v4f64_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vfredmin.vs v8, v8, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %v = load <4 x double>, ptr %x %red = call nnan double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v) ret double %red } define double @vreduce_fmin_v4f64_nonans_noinfs(ptr %x) { -; RV32-LABEL: vreduce_fmin_v4f64_nonans_noinfs: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: lui a1, %hi(.LCPI81_0) -; RV32-NEXT: fld fa5, %lo(.LCPI81_0)(a1) -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vfmv.s.f v10, fa5 -; RV32-NEXT: vfredmin.vs v8, v8, v10 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fmin_v4f64_nonans_noinfs: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI81_0) -; RV64-NEXT: fld fa5, %lo(.LCPI81_0)(a1) -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vfmv.s.f v10, fa5 -; RV64-NEXT: vfredmin.vs v8, v8, v10 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fmin_v4f64_nonans_noinfs: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vfredmin.vs v8, v8, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %v = load <4 x double>, ptr %x %red = call nnan ninf double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v) ret double %red @@ -1490,11 +1353,8 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI82_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI82_0)(a0) ; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: vfmv.s.f v16, fa5 -; CHECK-NEXT: vfredmin.vs v8, v8, v16 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <32 x double>, ptr %x @@ -1509,9 +1369,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: li a0, -512 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <2 x half>, ptr %x @@ -1526,9 +1384,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: li a0, -512 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <4 x half>, ptr %x @@ -1541,9 +1397,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: li a0, -1024 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <4 x half>, ptr %x @@ -1556,9 +1410,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: li a0, -1025 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <4 x half>, ptr %x @@ -1577,9 +1429,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: li a0, -512 -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v16 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x half>, ptr %x @@ -1594,9 +1444,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 1047552 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <2 x float>, ptr %x @@ -1611,9 +1459,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 1047552 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <4 x float>, ptr %x @@ -1626,9 +1472,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 1046528 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <4 x float>, ptr %x @@ -1637,27 +1481,13 @@ } define float @vreduce_fmax_v4f32_nonans_noinfs(ptr %x) { -; RV32-LABEL: vreduce_fmax_v4f32_nonans_noinfs: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: lui a1, %hi(.LCPI91_0) -; RV32-NEXT: flw fa5, %lo(.LCPI91_0)(a1) -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vfmv.s.f v9, fa5 -; RV32-NEXT: vfredmax.vs v8, v8, v9 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fmax_v4f32_nonans_noinfs: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI91_0) -; RV64-NEXT: flw fa5, %lo(.LCPI91_0)(a1) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vfmv.s.f v9, fa5 -; RV64-NEXT: vfredmax.vs v8, v8, v9 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fmax_v4f32_nonans_noinfs: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vfredmax.vs v8, v8, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %v = load <4 x float>, ptr %x %red = call nnan ninf float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) ret float %red @@ -1680,9 +1510,7 @@ ; CHECK-NEXT: vfmax.vv v16, v24, v16 ; CHECK-NEXT: vfmax.vv v8, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: lui a0, 1047552 -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v16 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x float>, ptr %x @@ -1693,27 +1521,13 @@ declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) define double @vreduce_fmax_v2f64(ptr %x) { -; RV32-LABEL: vreduce_fmax_v2f64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: lui a1, %hi(.LCPI93_0) -; RV32-NEXT: fld fa5, %lo(.LCPI93_0)(a1) -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vfmv.s.f v9, fa5 -; RV32-NEXT: vfredmax.vs v8, v8, v9 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fmax_v2f64: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI93_0) -; RV64-NEXT: fld fa5, %lo(.LCPI93_0)(a1) -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vfmv.s.f v9, fa5 -; RV64-NEXT: vfredmax.vs v8, v8, v9 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fmax_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vfredmax.vs v8, v8, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %v = load <2 x double>, ptr %x %red = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %v) ret double %red @@ -1722,81 +1536,39 @@ declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) define double @vreduce_fmax_v4f64(ptr %x) { -; RV32-LABEL: vreduce_fmax_v4f64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: lui a1, %hi(.LCPI94_0) -; RV32-NEXT: fld fa5, %lo(.LCPI94_0)(a1) -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vfmv.s.f v10, fa5 -; RV32-NEXT: vfredmax.vs v8, v8, v10 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fmax_v4f64: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI94_0) -; RV64-NEXT: fld fa5, %lo(.LCPI94_0)(a1) -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vfmv.s.f v10, fa5 -; RV64-NEXT: vfredmax.vs v8, v8, v10 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fmax_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vfredmax.vs v8, v8, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %v = load <4 x double>, ptr %x %red = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v) ret double %red } define double @vreduce_fmax_v4f64_nonans(ptr %x) { -; RV32-LABEL: vreduce_fmax_v4f64_nonans: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: lui a1, %hi(.LCPI95_0) -; RV32-NEXT: fld fa5, %lo(.LCPI95_0)(a1) -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vfmv.s.f v10, fa5 -; RV32-NEXT: vfredmax.vs v8, v8, v10 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fmax_v4f64_nonans: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI95_0) -; RV64-NEXT: fld fa5, %lo(.LCPI95_0)(a1) -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vfmv.s.f v10, fa5 -; RV64-NEXT: vfredmax.vs v8, v8, v10 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fmax_v4f64_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vfredmax.vs v8, v8, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %v = load <4 x double>, ptr %x %red = call nnan double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v) ret double %red } define double @vreduce_fmax_v4f64_nonans_noinfs(ptr %x) { -; RV32-LABEL: vreduce_fmax_v4f64_nonans_noinfs: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: lui a1, %hi(.LCPI96_0) -; RV32-NEXT: fld fa5, %lo(.LCPI96_0)(a1) -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vfmv.s.f v10, fa5 -; RV32-NEXT: vfredmax.vs v8, v8, v10 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fmax_v4f64_nonans_noinfs: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI96_0) -; RV64-NEXT: fld fa5, %lo(.LCPI96_0)(a1) -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vfmv.s.f v10, fa5 -; RV64-NEXT: vfredmax.vs v8, v8, v10 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fmax_v4f64_nonans_noinfs: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vfredmax.vs v8, v8, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %v = load <4 x double>, ptr %x %red = call nnan ninf double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v) ret double %red @@ -1811,11 +1583,8 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI97_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI97_0)(a0) ; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: vfmv.s.f v16, fa5 -; CHECK-NEXT: vfredmax.vs v8, v8, v16 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <32 x double>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -857,17 +857,14 @@ ; CHECK-NEXT: vmv.s.x v25, a0 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vredxor.vs v25, v8, v25, v0.t -; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: addi a0, a1, -32 ; CHECK-NEXT: sltu a1, a1, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vredxor.vs v8, v16, v8, v0.t -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vredxor.vs v25, v16, v25, v0.t +; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.xor.v64i32(i32 %s, <64 x i32> %v, <64 x i1> %m, i32 %evl) ret i32 %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -1769,8 +1769,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i8>, ptr %x @@ -1785,8 +1784,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <4 x i8>, ptr %x @@ -1801,8 +1799,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <8 x i8>, ptr %x @@ -1817,8 +1814,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i8>, ptr %x @@ -1834,10 +1830,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, -1 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vredand.vs v8, v8, v10 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i8>, ptr %x @@ -1853,10 +1846,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v12, -1 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vredand.vs v8, v8, v12 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i8>, ptr %x @@ -1872,10 +1862,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vredand.vs v8, v8, v16 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i8>, ptr %x @@ -1894,10 +1881,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vand.vv v8, v8, v16 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vredand.vs v8, v8, v16 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, ptr %x @@ -1926,8 +1910,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i16>, ptr %x @@ -1942,8 +1925,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <4 x i16>, ptr %x @@ -1958,8 +1940,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <8 x i16>, ptr %x @@ -1974,10 +1955,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, -1 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vredand.vs v8, v8, v10 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i16>, ptr %x @@ -1993,10 +1971,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v12, -1 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vredand.vs v8, v8, v12 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i16>, ptr %x @@ -2012,10 +1987,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vredand.vs v8, v8, v16 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i16>, ptr %x @@ -2034,10 +2006,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vand.vv v8, v8, v16 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vredand.vs v8, v8, v16 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, ptr %x @@ -2066,8 +2035,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i32>, ptr %x @@ -2082,8 +2050,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <4 x i32>, ptr %x @@ -2098,10 +2065,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, -1 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vredand.vs v8, v8, v10 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <8 x i32>, ptr %x @@ -2116,10 +2080,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v12, -1 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vredand.vs v8, v8, v12 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %x @@ -2135,10 +2096,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vredand.vs v8, v8, v16 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i32>, ptr %x @@ -2157,10 +2115,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vand.vv v8, v8, v16 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vredand.vs v8, v8, v16 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, ptr %x @@ -2199,8 +2154,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.v.i v9, -1 -; RV32-NEXT: vredand.vs v8, v8, v9 +; RV32-NEXT: vredand.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2212,8 +2166,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.v.i v9, -1 -; RV64-NEXT: vredand.vs v8, v8, v9 +; RV64-NEXT: vredand.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <2 x i64>, ptr %x @@ -2228,10 +2181,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v10, -1 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vredand.vs v8, v8, v10 +; RV32-NEXT: vredand.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2243,10 +2193,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v10, -1 -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vredand.vs v8, v8, v10 +; RV64-NEXT: vredand.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <4 x i64>, ptr %x @@ -2261,10 +2208,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v12, -1 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vredand.vs v8, v8, v12 +; RV32-NEXT: vredand.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2276,10 +2220,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v12, -1 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vredand.vs v8, v8, v12 +; RV64-NEXT: vredand.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <8 x i64>, ptr %x @@ -2294,10 +2235,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vredand.vs v8, v8, v16 +; RV32-NEXT: vredand.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2309,10 +2247,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v16, -1 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vredand.vs v8, v8, v16 +; RV64-NEXT: vredand.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <16 x i64>, ptr %x @@ -2330,10 +2265,7 @@ ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vredand.vs v8, v8, v16 +; RV32-NEXT: vredand.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2348,10 +2280,7 @@ ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v16, -1 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vredand.vs v8, v8, v16 +; RV64-NEXT: vredand.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, ptr %x @@ -2375,10 +2304,7 @@ ; RV32-NEXT: vand.vv v16, v24, v16 ; RV32-NEXT: vand.vv v8, v8, v0 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vredand.vs v8, v8, v16 +; RV32-NEXT: vredand.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2399,10 +2325,7 @@ ; RV64-NEXT: vand.vv v16, v24, v16 ; RV64-NEXT: vand.vv v8, v8, v0 ; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v16, -1 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vredand.vs v8, v8, v16 +; RV64-NEXT: vredand.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <64 x i64>, ptr %x @@ -2431,8 +2354,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i8>, ptr %x @@ -2447,8 +2369,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <4 x i8>, ptr %x @@ -2463,8 +2384,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <8 x i8>, ptr %x @@ -2479,8 +2399,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i8>, ptr %x @@ -2496,8 +2415,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vredor.vs v8, v8, v10 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i8>, ptr %x @@ -2513,8 +2431,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vredor.vs v8, v8, v12 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i8>, ptr %x @@ -2530,8 +2447,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vredor.vs v8, v8, v16 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i8>, ptr %x @@ -2550,8 +2466,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vredor.vs v8, v8, v16 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, ptr %x @@ -2580,8 +2495,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i16>, ptr %x @@ -2596,8 +2510,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <4 x i16>, ptr %x @@ -2612,8 +2525,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <8 x i16>, ptr %x @@ -2628,8 +2540,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vredor.vs v8, v8, v10 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i16>, ptr %x @@ -2645,8 +2556,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vredor.vs v8, v8, v12 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i16>, ptr %x @@ -2662,8 +2572,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vredor.vs v8, v8, v16 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i16>, ptr %x @@ -2682,8 +2591,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vredor.vs v8, v8, v16 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, ptr %x @@ -2712,8 +2620,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i32>, ptr %x @@ -2728,8 +2635,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <4 x i32>, ptr %x @@ -2744,8 +2650,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vredor.vs v8, v8, v10 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <8 x i32>, ptr %x @@ -2760,8 +2665,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vredor.vs v8, v8, v12 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %x @@ -2777,8 +2681,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vredor.vs v8, v8, v16 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i32>, ptr %x @@ -2797,8 +2700,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vredor.vs v8, v8, v16 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, ptr %x @@ -2837,8 +2739,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vredor.vs v8, v8, v9 +; RV32-NEXT: vredor.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2850,8 +2751,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vredor.vs v8, v8, v9 +; RV64-NEXT: vredor.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <2 x i64>, ptr %x @@ -2866,8 +2766,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v10, zero -; RV32-NEXT: vredor.vs v8, v8, v10 +; RV32-NEXT: vredor.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2879,8 +2778,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v10, zero -; RV64-NEXT: vredor.vs v8, v8, v10 +; RV64-NEXT: vredor.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <4 x i64>, ptr %x @@ -2895,8 +2793,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v12, zero -; RV32-NEXT: vredor.vs v8, v8, v12 +; RV32-NEXT: vredor.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2908,8 +2805,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v12, zero -; RV64-NEXT: vredor.vs v8, v8, v12 +; RV64-NEXT: vredor.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <8 x i64>, ptr %x @@ -2924,8 +2820,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v16, zero -; RV32-NEXT: vredor.vs v8, v8, v16 +; RV32-NEXT: vredor.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2937,8 +2832,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v16, zero -; RV64-NEXT: vredor.vs v8, v8, v16 +; RV64-NEXT: vredor.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <16 x i64>, ptr %x @@ -2956,8 +2850,7 @@ ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vmv.s.x v16, zero -; RV32-NEXT: vredor.vs v8, v8, v16 +; RV32-NEXT: vredor.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2972,8 +2865,7 @@ ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vmv.s.x v16, zero -; RV64-NEXT: vredor.vs v8, v8, v16 +; RV64-NEXT: vredor.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, ptr %x @@ -2997,8 +2889,7 @@ ; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: vor.vv v8, v8, v0 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vmv.s.x v16, zero -; RV32-NEXT: vredor.vs v8, v8, v16 +; RV32-NEXT: vredor.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -3019,8 +2910,7 @@ ; RV64-NEXT: vor.vv v16, v24, v16 ; RV64-NEXT: vor.vv v8, v8, v0 ; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vmv.s.x v16, zero -; RV64-NEXT: vredor.vs v8, v8, v16 +; RV64-NEXT: vredor.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <64 x i64>, ptr %x @@ -3667,9 +3557,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmin.vs v8, v8, v9 +; CHECK-NEXT: vredmin.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i8>, ptr %x @@ -3684,9 +3572,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmin.vs v8, v8, v9 +; CHECK-NEXT: vredmin.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <4 x i8>, ptr %x @@ -3701,9 +3587,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmin.vs v8, v8, v9 +; CHECK-NEXT: vredmin.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <8 x i8>, ptr %x @@ -3718,9 +3602,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmin.vs v8, v8, v9 +; CHECK-NEXT: vredmin.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i8>, ptr %x @@ -3736,9 +3618,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vredmin.vs v8, v8, v10 +; CHECK-NEXT: vredmin.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i8>, ptr %x @@ -3754,9 +3634,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vredmin.vs v8, v8, v12 +; CHECK-NEXT: vredmin.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i8>, ptr %x @@ -3772,9 +3650,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vredmin.vs v8, v8, v16 +; CHECK-NEXT: vredmin.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i8>, ptr %x @@ -3793,9 +3669,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vmin.vv v8, v8, v16 -; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vredmin.vs v8, v8, v16 +; CHECK-NEXT: vredmin.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, ptr %x @@ -3820,27 +3694,13 @@ declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>) define i16 @vreduce_smin_v2i16(ptr %x) { -; RV32-LABEL: vreduce_smin_v2i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: lui a0, 8 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vredmin.vs v8, v8, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_v2i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: lui a0, 8 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vredmin.vs v8, v8, v9 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %v = load <2 x i16>, ptr %x %red = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> %v) ret i16 %red @@ -3849,27 +3709,13 @@ declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>) define i16 @vreduce_smin_v4i16(ptr %x) { -; RV32-LABEL: vreduce_smin_v4i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: lui a0, 8 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vredmin.vs v8, v8, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_v4i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: lui a0, 8 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vredmin.vs v8, v8, v9 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %v = load <4 x i16>, ptr %x %red = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v) ret i16 %red @@ -3878,27 +3724,13 @@ declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) define i16 @vreduce_smin_v8i16(ptr %x) { -; RV32-LABEL: vreduce_smin_v8i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: lui a0, 8 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vredmin.vs v8, v8, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_v8i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: lui a0, 8 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vredmin.vs v8, v8, v9 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %v = load <8 x i16>, ptr %x %red = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v) ret i16 %red @@ -3907,27 +3739,13 @@ declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>) define i16 @vreduce_smin_v16i16(ptr %x) { -; RV32-LABEL: vreduce_smin_v16i16: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: lui a0, 8 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v10, a0 -; RV32-NEXT: vredmin.vs v8, v8, v10 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_v16i16: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: lui a0, 8 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vredmin.vs v8, v8, v10 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %v = load <16 x i16>, ptr %x %red = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %v) ret i16 %red @@ -3936,29 +3754,14 @@ declare i16 @llvm.vector.reduce.smin.v32i16(<32 x i16>) define i16 @vreduce_smin_v32i16(ptr %x) { -; RV32-LABEL: vreduce_smin_v32i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: lui a0, 8 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v12, a0 -; RV32-NEXT: vredmin.vs v8, v8, v12 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_v32i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: lui a0, 8 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vredmin.vs v8, v8, v12 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_v32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %v = load <32 x i16>, ptr %x %red = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> %v) ret i16 %red @@ -3967,29 +3770,14 @@ declare i16 @llvm.vector.reduce.smin.v64i16(<64 x i16>) define i16 @vreduce_smin_v64i16(ptr %x) { -; RV32-LABEL: vreduce_smin_v64i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 64 -; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: lui a0, 8 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v16, a0 -; RV32-NEXT: vredmin.vs v8, v8, v16 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_v64i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 64 -; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: lui a0, 8 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vredmin.vs v8, v8, v16 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_v64i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %v = load <64 x i16>, ptr %x %red = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> %v) ret i16 %red @@ -3998,35 +3786,17 @@ declare i16 @llvm.vector.reduce.smin.v128i16(<128 x i16>) define i16 @vreduce_smin_v128i16(ptr %x) { -; RV32-LABEL: vreduce_smin_v128i16: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 64 -; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vle16.v v16, (a0) -; RV32-NEXT: vmin.vv v8, v8, v16 -; RV32-NEXT: lui a0, 8 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v16, a0 -; RV32-NEXT: vredmin.vs v8, v8, v16 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_v128i16: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 64 -; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vle16.v v16, (a0) -; RV64-NEXT: vmin.vv v8, v8, v16 -; RV64-NEXT: lui a0, 8 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vredmin.vs v8, v8, v16 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_v128i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vmin.vv v8, v8, v16 +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %v = load <128 x i16>, ptr %x %red = call i16 @llvm.vector.reduce.smin.v128i16(<128 x i16> %v) ret i16 %red @@ -4049,27 +3819,13 @@ declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>) define i32 @vreduce_smin_v2i32(ptr %x) { -; RV32-LABEL: vreduce_smin_v2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vredmin.vs v8, v8, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: lui a0, 524288 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vredmin.vs v8, v8, v9 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %v = load <2 x i32>, ptr %x %red = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %v) ret i32 %red @@ -4078,27 +3834,13 @@ declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) define i32 @vreduce_smin_v4i32(ptr %x) { -; RV32-LABEL: vreduce_smin_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vredmin.vs v8, v8, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: lui a0, 524288 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vredmin.vs v8, v8, v9 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %v = load <4 x i32>, ptr %x %red = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %v) ret i32 %red @@ -4107,27 +3849,13 @@ declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>) define i32 @vreduce_smin_v8i32(ptr %x) { -; RV32-LABEL: vreduce_smin_v8i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v10, a0 -; RV32-NEXT: vredmin.vs v8, v8, v10 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_v8i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: lui a0, 524288 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vredmin.vs v8, v8, v10 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %v = load <8 x i32>, ptr %x %red = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %v) ret i32 %red @@ -4136,27 +3864,13 @@ declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>) define i32 @vreduce_smin_v16i32(ptr %x) { -; RV32-LABEL: vreduce_smin_v16i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v12, a0 -; RV32-NEXT: vredmin.vs v8, v8, v12 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_v16i32: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: lui a0, 524288 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vredmin.vs v8, v8, v12 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %v = load <16 x i32>, ptr %x %red = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %v) ret i32 %red @@ -4165,29 +3879,14 @@ declare i32 @llvm.vector.reduce.smin.v32i32(<32 x i32>) define i32 @vreduce_smin_v32i32(ptr %x) { -; RV32-LABEL: vreduce_smin_v32i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v16, a0 -; RV32-NEXT: vredmin.vs v8, v8, v16 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_v32i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: lui a0, 524288 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vredmin.vs v8, v8, v16 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_v32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %v = load <32 x i32>, ptr %x %red = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> %v) ret i32 %red @@ -4196,35 +3895,17 @@ declare i32 @llvm.vector.reduce.smin.v64i32(<64 x i32>) define i32 @vreduce_smin_v64i32(ptr %x) { -; RV32-LABEL: vreduce_smin_v64i32: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vle32.v v16, (a0) -; RV32-NEXT: vmin.vv v8, v8, v16 -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v16, a0 -; RV32-NEXT: vredmin.vs v8, v8, v16 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_v64i32: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vle32.v v16, (a0) -; RV64-NEXT: vmin.vv v8, v8, v16 -; RV64-NEXT: lui a0, 524288 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vredmin.vs v8, v8, v16 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_v64i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vmin.vv v8, v8, v16 +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %v = load <64 x i32>, ptr %x %red = call i32 @llvm.vector.reduce.smin.v64i32(<64 x i32> %v) ret i32 %red @@ -4259,36 +3940,21 @@ define i64 @vreduce_smin_v2i64(ptr %x) { ; RV32-LABEL: vreduce_smin_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: li a0, -1 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vredmin.vs v8, v8, v9 +; RV32-NEXT: vredmin.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smin_v2i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: li a0, -1 -; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vredmin.vs v8, v8, v9 +; RV64-NEXT: vredmin.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <2 x i64>, ptr %x @@ -4301,36 +3967,21 @@ define i64 @vreduce_smin_v4i64(ptr %x) { ; RV32-LABEL: vreduce_smin_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: li a0, -1 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vredmin.vs v8, v8, v10 +; RV32-NEXT: vredmin.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smin_v4i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: li a0, -1 -; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vredmin.vs v8, v8, v10 +; RV64-NEXT: vredmin.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <4 x i64>, ptr %x @@ -4343,36 +3994,21 @@ define i64 @vreduce_smin_v8i64(ptr %x) { ; RV32-LABEL: vreduce_smin_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: li a0, -1 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vredmin.vs v8, v8, v12 +; RV32-NEXT: vredmin.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smin_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: li a0, -1 -; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vredmin.vs v8, v8, v12 +; RV64-NEXT: vredmin.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <8 x i64>, ptr %x @@ -4385,36 +4021,21 @@ define i64 @vreduce_smin_v16i64(ptr %x) { ; RV32-LABEL: vreduce_smin_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: li a0, -1 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vredmin.vs v8, v8, v16 +; RV32-NEXT: vredmin.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smin_v16i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: li a0, -1 -; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vredmin.vs v8, v8, v16 +; RV64-NEXT: vredmin.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <16 x i64>, ptr %x @@ -4427,29 +4048,17 @@ define i64 @vreduce_smin_v32i64(ptr %x) { ; RV32-LABEL: vreduce_smin_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) -; RV32-NEXT: li a0, -1 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: vmin.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vredmin.vs v8, v8, v16 +; RV32-NEXT: vredmin.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smin_v32i64: @@ -4459,10 +4068,7 @@ ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vmin.vv v8, v8, v16 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vredmin.vs v8, v8, v16 +; RV64-NEXT: vredmin.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, ptr %x @@ -4475,34 +4081,23 @@ define i64 @vreduce_smin_v64i64(ptr %x) nounwind { ; RV32-LABEL: vreduce_smin_v64i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi a1, a0, 256 -; RV32-NEXT: vle64.v v16, (a1) ; RV32-NEXT: addi a1, a0, 384 -; RV32-NEXT: vle64.v v24, (a1) +; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: addi a1, a0, 256 ; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vle64.v v0, (a0) -; RV32-NEXT: li a0, -1 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: vmin.vv v24, v0, v24 +; RV32-NEXT: vle64.v v24, (a0) +; RV32-NEXT: vle64.v v0, (a1) +; RV32-NEXT: vmin.vv v16, v24, v16 +; RV32-NEXT: vmin.vv v8, v8, v0 ; RV32-NEXT: vmin.vv v8, v8, v16 -; RV32-NEXT: vmin.vv v8, v8, v24 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vredmin.vs v8, v8, v16 +; RV32-NEXT: vredmin.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smin_v64i64: @@ -4518,10 +4113,7 @@ ; RV64-NEXT: vmin.vv v16, v24, v16 ; RV64-NEXT: vmin.vv v8, v8, v0 ; RV64-NEXT: vmin.vv v8, v8, v16 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vredmin.vs v8, v8, v16 +; RV64-NEXT: vredmin.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <64 x i64>, ptr %x @@ -4550,9 +4142,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i8>, ptr %x @@ -4567,9 +4157,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <4 x i8>, ptr %x @@ -4584,9 +4172,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <8 x i8>, ptr %x @@ -4601,9 +4187,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i8>, ptr %x @@ -4619,9 +4203,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v10 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i8>, ptr %x @@ -4637,9 +4219,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v12 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i8>, ptr %x @@ -4655,9 +4235,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v16 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i8>, ptr %x @@ -4676,9 +4254,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vmax.vv v8, v8, v16 -; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v16 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, ptr %x @@ -4707,9 +4283,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i16>, ptr %x @@ -4724,9 +4298,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <4 x i16>, ptr %x @@ -4741,9 +4313,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <8 x i16>, ptr %x @@ -4758,9 +4328,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v10 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i16>, ptr %x @@ -4776,9 +4344,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v12 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i16>, ptr %x @@ -4794,9 +4360,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v16 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i16>, ptr %x @@ -4815,9 +4379,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vmax.vv v8, v8, v16 -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v16 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, ptr %x @@ -4846,9 +4408,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i32>, ptr %x @@ -4863,9 +4423,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <4 x i32>, ptr %x @@ -4880,9 +4438,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v10 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <8 x i32>, ptr %x @@ -4897,9 +4453,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v12 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %x @@ -4915,9 +4469,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v16 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i32>, ptr %x @@ -4936,9 +4488,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vmax.vv v8, v8, v16 -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v16 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, ptr %x @@ -4975,34 +4525,21 @@ define i64 @vreduce_smax_v2i64(ptr %x) { ; RV32-LABEL: vreduce_smax_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vredmax.vs v8, v8, v9 +; RV32-NEXT: vredmax.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smax_v2i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: li a0, -1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vredmax.vs v8, v8, v9 +; RV64-NEXT: vredmax.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <2 x i64>, ptr %x @@ -5015,34 +4552,21 @@ define i64 @vreduce_smax_v4i64(ptr %x) { ; RV32-LABEL: vreduce_smax_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vredmax.vs v8, v8, v10 +; RV32-NEXT: vredmax.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smax_v4i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: li a0, -1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vredmax.vs v8, v8, v10 +; RV64-NEXT: vredmax.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <4 x i64>, ptr %x @@ -5055,34 +4579,21 @@ define i64 @vreduce_smax_v8i64(ptr %x) { ; RV32-LABEL: vreduce_smax_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vredmax.vs v8, v8, v12 +; RV32-NEXT: vredmax.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smax_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: li a0, -1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vredmax.vs v8, v8, v12 +; RV64-NEXT: vredmax.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <8 x i64>, ptr %x @@ -5095,34 +4606,21 @@ define i64 @vreduce_smax_v16i64(ptr %x) { ; RV32-LABEL: vreduce_smax_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vredmax.vs v8, v8, v16 +; RV32-NEXT: vredmax.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smax_v16i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: li a0, -1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vredmax.vs v8, v8, v16 +; RV64-NEXT: vredmax.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <16 x i64>, ptr %x @@ -5135,27 +4633,17 @@ define i64 @vreduce_smax_v32i64(ptr %x) { ; RV32-LABEL: vreduce_smax_v32i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw zero, 8(sp) ; RV32-NEXT: vmax.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vredmax.vs v8, v8, v16 +; RV32-NEXT: vredmax.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smax_v32i64: @@ -5165,10 +4653,7 @@ ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vmax.vv v8, v8, v16 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vredmax.vs v8, v8, v16 +; RV64-NEXT: vredmax.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, ptr %x @@ -5181,32 +4666,23 @@ define i64 @vreduce_smax_v64i64(ptr %x) nounwind { ; RV32-LABEL: vreduce_smax_v64i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi a1, a0, 256 -; RV32-NEXT: vle64.v v16, (a1) ; RV32-NEXT: addi a1, a0, 384 -; RV32-NEXT: vle64.v v24, (a1) +; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: addi a1, a0, 256 ; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vle64.v v0, (a0) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: vmax.vv v24, v0, v24 +; RV32-NEXT: vle64.v v24, (a0) +; RV32-NEXT: vle64.v v0, (a1) +; RV32-NEXT: vmax.vv v16, v24, v16 +; RV32-NEXT: vmax.vv v8, v8, v0 ; RV32-NEXT: vmax.vv v8, v8, v16 -; RV32-NEXT: vmax.vv v8, v8, v24 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vredmax.vs v8, v8, v16 +; RV32-NEXT: vredmax.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smax_v64i64: @@ -5222,10 +4698,7 @@ ; RV64-NEXT: vmax.vv v16, v24, v16 ; RV64-NEXT: vmax.vv v8, v8, v0 ; RV64-NEXT: vmax.vv v8, v8, v16 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vredmax.vs v8, v8, v16 +; RV64-NEXT: vredmax.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <64 x i64>, ptr %x @@ -5254,8 +4727,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i8>, ptr %x @@ -5270,8 +4742,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <4 x i8>, ptr %x @@ -5286,8 +4757,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <8 x i8>, ptr %x @@ -5302,8 +4772,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i8>, ptr %x @@ -5319,10 +4788,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, -1 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vredminu.vs v8, v8, v10 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i8>, ptr %x @@ -5338,10 +4804,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v12, -1 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vredminu.vs v8, v8, v12 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i8>, ptr %x @@ -5357,10 +4820,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vredminu.vs v8, v8, v16 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i8>, ptr %x @@ -5379,10 +4839,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vminu.vv v8, v8, v16 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vredminu.vs v8, v8, v16 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, ptr %x @@ -5411,8 +4868,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i16>, ptr %x @@ -5427,8 +4883,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <4 x i16>, ptr %x @@ -5443,8 +4898,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <8 x i16>, ptr %x @@ -5459,10 +4913,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, -1 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vredminu.vs v8, v8, v10 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i16>, ptr %x @@ -5478,10 +4929,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v12, -1 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vredminu.vs v8, v8, v12 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i16>, ptr %x @@ -5497,10 +4945,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vredminu.vs v8, v8, v16 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i16>, ptr %x @@ -5519,10 +4964,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vminu.vv v8, v8, v16 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vredminu.vs v8, v8, v16 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, ptr %x @@ -5551,8 +4993,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i32>, ptr %x @@ -5567,8 +5008,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <4 x i32>, ptr %x @@ -5583,10 +5023,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, -1 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vredminu.vs v8, v8, v10 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <8 x i32>, ptr %x @@ -5601,10 +5038,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v12, -1 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vredminu.vs v8, v8, v12 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %x @@ -5620,10 +5054,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vredminu.vs v8, v8, v16 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i32>, ptr %x @@ -5642,10 +5073,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vminu.vv v8, v8, v16 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vredminu.vs v8, v8, v16 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, ptr %x @@ -5684,8 +5112,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.v.i v9, -1 -; RV32-NEXT: vredminu.vs v8, v8, v9 +; RV32-NEXT: vredminu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5697,8 +5124,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.v.i v9, -1 -; RV64-NEXT: vredminu.vs v8, v8, v9 +; RV64-NEXT: vredminu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <2 x i64>, ptr %x @@ -5713,10 +5139,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v10, -1 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vredminu.vs v8, v8, v10 +; RV32-NEXT: vredminu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5728,10 +5151,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v10, -1 -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vredminu.vs v8, v8, v10 +; RV64-NEXT: vredminu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <4 x i64>, ptr %x @@ -5746,10 +5166,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v12, -1 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vredminu.vs v8, v8, v12 +; RV32-NEXT: vredminu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5761,10 +5178,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v12, -1 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vredminu.vs v8, v8, v12 +; RV64-NEXT: vredminu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <8 x i64>, ptr %x @@ -5779,10 +5193,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vredminu.vs v8, v8, v16 +; RV32-NEXT: vredminu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5794,10 +5205,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v16, -1 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vredminu.vs v8, v8, v16 +; RV64-NEXT: vredminu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <16 x i64>, ptr %x @@ -5815,10 +5223,7 @@ ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) ; RV32-NEXT: vminu.vv v8, v8, v16 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vredminu.vs v8, v8, v16 +; RV32-NEXT: vredminu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5833,10 +5238,7 @@ ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vminu.vv v8, v8, v16 -; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v16, -1 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vredminu.vs v8, v8, v16 +; RV64-NEXT: vredminu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, ptr %x @@ -5860,10 +5262,7 @@ ; RV32-NEXT: vminu.vv v16, v24, v16 ; RV32-NEXT: vminu.vv v8, v8, v0 ; RV32-NEXT: vminu.vv v8, v8, v16 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vredminu.vs v8, v8, v16 +; RV32-NEXT: vredminu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5884,10 +5283,7 @@ ; RV64-NEXT: vminu.vv v16, v24, v16 ; RV64-NEXT: vminu.vv v8, v8, v0 ; RV64-NEXT: vminu.vv v8, v8, v16 -; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v16, -1 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vredminu.vs v8, v8, v16 +; RV64-NEXT: vredminu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <64 x i64>, ptr %x @@ -5916,8 +5312,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i8>, ptr %x @@ -5932,8 +5327,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <4 x i8>, ptr %x @@ -5948,8 +5342,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <8 x i8>, ptr %x @@ -5964,8 +5357,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i8>, ptr %x @@ -5981,8 +5373,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v10 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i8>, ptr %x @@ -5998,8 +5389,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v12 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i8>, ptr %x @@ -6015,8 +5405,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v16 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i8>, ptr %x @@ -6035,8 +5424,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vmaxu.vv v8, v8, v16 -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v16 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, ptr %x @@ -6065,8 +5453,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i16>, ptr %x @@ -6081,8 +5468,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <4 x i16>, ptr %x @@ -6097,8 +5483,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <8 x i16>, ptr %x @@ -6113,8 +5498,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v10 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i16>, ptr %x @@ -6130,8 +5514,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v12 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i16>, ptr %x @@ -6147,8 +5530,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v16 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i16>, ptr %x @@ -6167,8 +5549,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vmaxu.vv v8, v8, v16 -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v16 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, ptr %x @@ -6197,8 +5578,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <2 x i32>, ptr %x @@ -6213,8 +5593,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <4 x i32>, ptr %x @@ -6229,8 +5608,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v10 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <8 x i32>, ptr %x @@ -6245,8 +5623,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v12 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %x @@ -6262,8 +5639,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v16 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <32 x i32>, ptr %x @@ -6282,8 +5658,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vmaxu.vv v8, v8, v16 -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v16 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, ptr %x @@ -6322,8 +5697,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vredmaxu.vs v8, v8, v9 +; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -6335,8 +5709,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vredmaxu.vs v8, v8, v9 +; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <2 x i64>, ptr %x @@ -6351,8 +5724,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v10, zero -; RV32-NEXT: vredmaxu.vs v8, v8, v10 +; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -6364,8 +5736,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v10, zero -; RV64-NEXT: vredmaxu.vs v8, v8, v10 +; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <4 x i64>, ptr %x @@ -6380,8 +5751,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v12, zero -; RV32-NEXT: vredmaxu.vs v8, v8, v12 +; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -6393,8 +5763,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v12, zero -; RV64-NEXT: vredmaxu.vs v8, v8, v12 +; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <8 x i64>, ptr %x @@ -6409,8 +5778,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v16, zero -; RV32-NEXT: vredmaxu.vs v8, v8, v16 +; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -6422,8 +5790,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v16, zero -; RV64-NEXT: vredmaxu.vs v8, v8, v16 +; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <16 x i64>, ptr %x @@ -6441,8 +5808,7 @@ ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) ; RV32-NEXT: vmaxu.vv v8, v8, v16 -; RV32-NEXT: vmv.s.x v16, zero -; RV32-NEXT: vredmaxu.vs v8, v8, v16 +; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -6457,8 +5823,7 @@ ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vmaxu.vv v8, v8, v16 -; RV64-NEXT: vmv.s.x v16, zero -; RV64-NEXT: vredmaxu.vs v8, v8, v16 +; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, ptr %x @@ -6482,8 +5847,7 @@ ; RV32-NEXT: vmaxu.vv v16, v24, v16 ; RV32-NEXT: vmaxu.vv v8, v8, v0 ; RV32-NEXT: vmaxu.vv v8, v8, v16 -; RV32-NEXT: vmv.s.x v16, zero -; RV32-NEXT: vredmaxu.vs v8, v8, v16 +; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -6504,8 +5868,7 @@ ; RV64-NEXT: vmaxu.vv v16, v24, v16 ; RV64-NEXT: vmaxu.vv v8, v8, v0 ; RV64-NEXT: vmaxu.vv v8, v8, v16 -; RV64-NEXT: vmv.s.x v16, zero -; RV64-NEXT: vredmaxu.vs v8, v8, v16 +; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <64 x i64>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll --- a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll @@ -34,9 +34,9 @@ ; CHECK-LABEL: reduce_and: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vredand.vs v8, v8, v10 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vredand.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v) @@ -47,11 +47,10 @@ define i64 @reduce_and2(<4 x i64> %v) { ; CHECK-LABEL: reduce_and2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vredand.vs v8, v8, v10 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: andi a0, a0, 8 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v) @@ -63,9 +62,9 @@ ; CHECK-LABEL: reduce_or: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vredor.vs v8, v8, v10 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vredor.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: or a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v) @@ -76,11 +75,10 @@ define i64 @reduce_or2(<4 x i64> %v) { ; CHECK-LABEL: reduce_or2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vredor.vs v8, v8, v10 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ori a0, a0, 8 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v) @@ -121,9 +119,9 @@ ; CHECK-LABEL: reduce_umax: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vredmaxu.vs v8, v8, v10 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: maxu a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v) @@ -134,11 +132,11 @@ define i64 @reduce_umax2(<4 x i64> %v) { ; CHECK-LABEL: reduce_umax2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vredmaxu.vs v8, v8, v10 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: li a1, 8 +; CHECK-NEXT: maxu a0, a0, a1 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v) @@ -150,9 +148,9 @@ ; CHECK-LABEL: reduce_umin: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vredminu.vs v8, v8, v10 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vredminu.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: minu a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v) @@ -163,11 +161,11 @@ define i64 @reduce_umin2(<4 x i64> %v) { ; CHECK-LABEL: reduce_umin2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vredminu.vs v8, v8, v10 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: li a1, 8 +; CHECK-NEXT: minu a0, a0, a1 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v) @@ -179,9 +177,9 @@ ; CHECK-LABEL: reduce_smax: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v10 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vredmax.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: max a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v) @@ -192,11 +190,11 @@ define i64 @reduce_smax2(<4 x i64> %v) { ; CHECK-LABEL: reduce_smax2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vredmax.vs v8, v8, v10 +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: li a1, 8 +; CHECK-NEXT: max a0, a0, a1 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v) @@ -208,9 +206,9 @@ ; CHECK-LABEL: reduce_smin: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vredmin.vs v8, v8, v10 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: min a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v) @@ -221,11 +219,11 @@ define i64 @reduce_smin2(<4 x i64> %v) { ; CHECK-LABEL: reduce_smin2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vredmin.vs v8, v8, v10 +; CHECK-NEXT: vredmin.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: li a1, 8 +; CHECK-NEXT: min a0, a0, a1 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v) @@ -303,9 +301,9 @@ ; CHECK-LABEL: reduce_fmax: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fmax.s fa0, fa0, fa5 ; CHECK-NEXT: ret entry: %rdx = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) @@ -317,9 +315,9 @@ ; CHECK-LABEL: reduce_fmin: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fmin.s fa0, fa0, fa5 ; CHECK-NEXT: ret entry: %rdx = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -423,11 +423,8 @@ define half @vreduce_fmin_nxv1f16( %v) { ; CHECK-LABEL: vreduce_fmin_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI30_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI30_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa5 -; CHECK-NEXT: vfredmin.vs v8, v8, v9 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fmin.nxv1f16( %v) @@ -437,11 +434,8 @@ define half @vreduce_fmin_nxv1f16_nonans( %v) #0 { ; CHECK-LABEL: vreduce_fmin_nxv1f16_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI31_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI31_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa5 -; CHECK-NEXT: vfredmin.vs v8, v8, v9 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16( %v) @@ -451,11 +445,8 @@ define half @vreduce_fmin_nxv1f16_nonans_noinfs( %v) #1 { ; CHECK-LABEL: vreduce_fmin_nxv1f16_nonans_noinfs: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI32_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI32_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa5 -; CHECK-NEXT: vfredmin.vs v8, v8, v9 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16( %v) @@ -467,11 +458,8 @@ define half @vreduce_fmin_nxv2f16( %v) { ; CHECK-LABEL: vreduce_fmin_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI33_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI33_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa5 -; CHECK-NEXT: vfredmin.vs v8, v8, v9 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fmin.nxv2f16( %v) @@ -483,11 +471,8 @@ define half @vreduce_fmin_nxv4f16( %v) { ; CHECK-LABEL: vreduce_fmin_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI34_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI34_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa5 -; CHECK-NEXT: vfredmin.vs v8, v8, v9 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fmin.nxv4f16( %v) @@ -499,12 +484,9 @@ define half @vreduce_fmin_nxv64f16( %v) { ; CHECK-LABEL: vreduce_fmin_nxv64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI35_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI35_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: vfmv.s.f v16, fa5 -; CHECK-NEXT: vfredmin.vs v8, v8, v16 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fmin.nxv64f16( %v) @@ -516,10 +498,8 @@ define float @vreduce_fmin_nxv1f32( %v) { ; CHECK-LABEL: vreduce_fmin_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 523264 -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmin.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call float @llvm.vector.reduce.fmin.nxv1f32( %v) @@ -529,10 +509,8 @@ define float @vreduce_fmin_nxv1f32_nonans( %v) { ; CHECK-LABEL: vreduce_fmin_nxv1f32_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 522240 -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmin.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32( %v) @@ -542,11 +520,8 @@ define float @vreduce_fmin_nxv1f32_nonans_noinfs( %v) { ; CHECK-LABEL: vreduce_fmin_nxv1f32_nonans_noinfs: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: flw fa5, %lo(.LCPI38_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa5 -; CHECK-NEXT: vfredmin.vs v8, v8, v9 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32( %v) @@ -558,10 +533,8 @@ define float @vreduce_fmin_nxv2f32( %v) { ; CHECK-LABEL: vreduce_fmin_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 523264 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmin.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call float @llvm.vector.reduce.fmin.nxv2f32( %v) @@ -573,10 +546,8 @@ define float @vreduce_fmin_nxv4f32( %v) { ; CHECK-LABEL: vreduce_fmin_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 523264 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vfredmin.vs v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call float @llvm.vector.reduce.fmin.nxv4f32( %v) @@ -590,9 +561,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: lui a0, 523264 -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vfredmin.vs v8, v8, v16 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call float @llvm.vector.reduce.fmin.nxv32f32( %v) @@ -604,11 +573,8 @@ define double @vreduce_fmin_nxv1f64( %v) { ; CHECK-LABEL: vreduce_fmin_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI42_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa5 -; CHECK-NEXT: vfredmin.vs v8, v8, v9 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call double @llvm.vector.reduce.fmin.nxv1f64( %v) @@ -618,11 +584,8 @@ define double @vreduce_fmin_nxv1f64_nonans( %v) { ; CHECK-LABEL: vreduce_fmin_nxv1f64_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI43_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa5 -; CHECK-NEXT: vfredmin.vs v8, v8, v9 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64( %v) @@ -632,11 +595,8 @@ define double @vreduce_fmin_nxv1f64_nonans_noinfs( %v) { ; CHECK-LABEL: vreduce_fmin_nxv1f64_nonans_noinfs: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI44_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa5 -; CHECK-NEXT: vfredmin.vs v8, v8, v9 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64( %v) @@ -648,11 +608,8 @@ define double @vreduce_fmin_nxv2f64( %v) { ; CHECK-LABEL: vreduce_fmin_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI45_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa5 -; CHECK-NEXT: vfredmin.vs v8, v8, v10 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call double @llvm.vector.reduce.fmin.nxv2f64( %v) @@ -664,11 +621,8 @@ define double @vreduce_fmin_nxv4f64( %v) { ; CHECK-LABEL: vreduce_fmin_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI46_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI46_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa5 -; CHECK-NEXT: vfredmin.vs v8, v8, v12 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call double @llvm.vector.reduce.fmin.nxv4f64( %v) @@ -680,12 +634,9 @@ define double @vreduce_fmin_nxv16f64( %v) { ; CHECK-LABEL: vreduce_fmin_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI47_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI47_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: vfmv.s.f v16, fa5 -; CHECK-NEXT: vfredmin.vs v8, v8, v16 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call double @llvm.vector.reduce.fmin.nxv16f64( %v) @@ -697,10 +648,8 @@ define half @vreduce_fmax_nxv1f16( %v) { ; CHECK-LABEL: vreduce_fmax_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, -512 -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fmax.nxv1f16( %v) @@ -710,10 +659,8 @@ define half @vreduce_fmax_nxv1f16_nonans( %v) #0 { ; CHECK-LABEL: vreduce_fmax_nxv1f16_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, -1024 -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16( %v) @@ -723,10 +670,8 @@ define half @vreduce_fmax_nxv1f16_nonans_noinfs( %v) #1 { ; CHECK-LABEL: vreduce_fmax_nxv1f16_nonans_noinfs: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, -1025 -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16( %v) @@ -738,10 +683,8 @@ define half @vreduce_fmax_nxv2f16( %v) { ; CHECK-LABEL: vreduce_fmax_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, -512 -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fmax.nxv2f16( %v) @@ -753,10 +696,8 @@ define half @vreduce_fmax_nxv4f16( %v) { ; CHECK-LABEL: vreduce_fmax_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, -512 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fmax.nxv4f16( %v) @@ -770,9 +711,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: li a0, -512 -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v16 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fmax.nxv64f16( %v) @@ -784,10 +723,8 @@ define float @vreduce_fmax_nxv1f32( %v) { ; CHECK-LABEL: vreduce_fmax_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 1047552 -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call float @llvm.vector.reduce.fmax.nxv1f32( %v) @@ -797,10 +734,8 @@ define float @vreduce_fmax_nxv1f32_nonans( %v) { ; CHECK-LABEL: vreduce_fmax_nxv1f32_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 1046528 -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32( %v) @@ -810,11 +745,8 @@ define float @vreduce_fmax_nxv1f32_nonans_noinfs( %v) { ; CHECK-LABEL: vreduce_fmax_nxv1f32_nonans_noinfs: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI56_0) -; CHECK-NEXT: flw fa5, %lo(.LCPI56_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa5 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32( %v) @@ -826,10 +758,8 @@ define float @vreduce_fmax_nxv2f32( %v) { ; CHECK-LABEL: vreduce_fmax_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 1047552 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call float @llvm.vector.reduce.fmax.nxv2f32( %v) @@ -841,10 +771,8 @@ define float @vreduce_fmax_nxv4f32( %v) { ; CHECK-LABEL: vreduce_fmax_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 1047552 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call float @llvm.vector.reduce.fmax.nxv4f32( %v) @@ -858,9 +786,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: lui a0, 1047552 -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vfredmax.vs v8, v8, v16 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call float @llvm.vector.reduce.fmax.nxv32f32( %v) @@ -872,11 +798,8 @@ define double @vreduce_fmax_nxv1f64( %v) { ; CHECK-LABEL: vreduce_fmax_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI60_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI60_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa5 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call double @llvm.vector.reduce.fmax.nxv1f64( %v) @@ -886,11 +809,8 @@ define double @vreduce_fmax_nxv1f64_nonans( %v) { ; CHECK-LABEL: vreduce_fmax_nxv1f64_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI61_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI61_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa5 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64( %v) @@ -900,11 +820,8 @@ define double @vreduce_fmax_nxv1f64_nonans_noinfs( %v) { ; CHECK-LABEL: vreduce_fmax_nxv1f64_nonans_noinfs: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI62_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI62_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa5 -; CHECK-NEXT: vfredmax.vs v8, v8, v9 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64( %v) @@ -916,11 +833,8 @@ define double @vreduce_fmax_nxv2f64( %v) { ; CHECK-LABEL: vreduce_fmax_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI63_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI63_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa5 -; CHECK-NEXT: vfredmax.vs v8, v8, v10 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call double @llvm.vector.reduce.fmax.nxv2f64( %v) @@ -932,11 +846,8 @@ define double @vreduce_fmax_nxv4f64( %v) { ; CHECK-LABEL: vreduce_fmax_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI64_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI64_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa5 -; CHECK-NEXT: vfredmax.vs v8, v8, v12 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call double @llvm.vector.reduce.fmax.nxv4f64( %v) @@ -948,12 +859,9 @@ define double @vreduce_fmax_nxv16f64( %v) { ; CHECK-LABEL: vreduce_fmax_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI65_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI65_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: vfmv.s.f v16, fa5 -; CHECK-NEXT: vfredmax.vs v8, v8, v16 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call double @llvm.vector.reduce.fmax.nxv16f64( %v) @@ -1110,9 +1018,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv10f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI73_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI73_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v12, fa5 +; CHECK-NEXT: addi a0, a0, %lo(.LCPI73_0) +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vlse16.v v12, (a0), zero ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma @@ -1122,8 +1030,7 @@ ; CHECK-NEXT: vslideup.vx v11, v12, a0 ; CHECK-NEXT: vslideup.vx v10, v12, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa5 -; CHECK-NEXT: vfredmin.vs v8, v8, v12 +; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fmin.nxv10f16( %v) @@ -1137,10 +1044,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -512 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vmv.v.x v11, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v12 +; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fmax.nxv12f16( %v) diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll @@ -1153,60 +1153,56 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; RV32-LABEL: vpreduce_umax_nxv32i32: ; RV32: # %bb.0: -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: srli a2, a3, 2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: srli a3, a2, 2 ; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vx v24, v0, a2 -; RV32-NEXT: slli a3, a3, 1 -; RV32-NEXT: sub a2, a1, a3 -; RV32-NEXT: sltu a4, a1, a2 -; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: and a2, a4, a2 +; RV32-NEXT: vslidedown.vx v24, v0, a3 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: slli a2, a2, 1 ; RV32-NEXT: vmv.s.x v25, a0 -; RV32-NEXT: bltu a1, a3, .LBB67_2 +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: bltu a1, a2, .LBB67_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, a3 +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: .LBB67_2: -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV32-NEXT: vredmaxu.vs v25, v8, v25, v0.t -; RV32-NEXT: vmv.x.s a0, v25 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: sub a0, a1, a2 +; RV32-NEXT: sltu a1, a1, a0 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: vredmaxu.vs v8, v16, v8, v0.t -; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: vredmaxu.vs v25, v16, v25, v0.t +; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umax_nxv32i32: ; RV64: # %bb.0: -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: srli a2, a3, 2 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: srli a3, a2, 2 ; RV64-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v24, v0, a2 -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a2, a0, 32 -; RV64-NEXT: slli a3, a3, 1 -; RV64-NEXT: sub a0, a1, a3 -; RV64-NEXT: sltu a4, a1, a0 -; RV64-NEXT: addi a4, a4, -1 -; RV64-NEXT: and a0, a4, a0 -; RV64-NEXT: bltu a1, a3, .LBB67_2 +; RV64-NEXT: vslidedown.vx v24, v0, a3 +; RV64-NEXT: slli a3, a0, 32 +; RV64-NEXT: slli a0, a2, 1 +; RV64-NEXT: srli a3, a3, 32 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: bltu a1, a0, .LBB67_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, a3 +; RV64-NEXT: mv a2, a0 ; RV64-NEXT: .LBB67_2: ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v25, a2 -; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV64-NEXT: vmv.s.x v25, a3 +; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV64-NEXT: vredmaxu.vs v25, v8, v25, v0.t -; RV64-NEXT: vmv.x.s a1, v25 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v8, a1 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: sltu a1, a1, a0 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vredmaxu.vs v8, v16, v8, v0.t -; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: vredmaxu.vs v25, v16, v25, v0.t +; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %r = call i32 @llvm.vp.reduce.umax.nxv32i32(i32 %s, %v, %m, i32 %evl) ret i32 %r diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll @@ -24,8 +24,7 @@ ; CHECK-LABEL: vreduce_umax_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.umax.nxv1i8( %v) @@ -37,10 +36,8 @@ define signext i8 @vreduce_smax_nxv1i8( %v) { ; CHECK-LABEL: vreduce_smax_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.smax.nxv1i8( %v) @@ -53,8 +50,7 @@ ; CHECK-LABEL: vreduce_umin_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.umin.nxv1i8( %v) @@ -66,10 +62,8 @@ define signext i8 @vreduce_smin_nxv1i8( %v) { ; CHECK-LABEL: vreduce_smin_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmin.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vredmin.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.smin.nxv1i8( %v) @@ -82,8 +76,7 @@ ; CHECK-LABEL: vreduce_and_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.and.nxv1i8( %v) @@ -96,8 +89,7 @@ ; CHECK-LABEL: vreduce_or_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.or.nxv1i8( %v) @@ -138,8 +130,7 @@ ; CHECK-LABEL: vreduce_umax_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.umax.nxv2i8( %v) @@ -151,10 +142,8 @@ define signext i8 @vreduce_smax_nxv2i8( %v) { ; CHECK-LABEL: vreduce_smax_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.smax.nxv2i8( %v) @@ -167,8 +156,7 @@ ; CHECK-LABEL: vreduce_umin_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.umin.nxv2i8( %v) @@ -180,10 +168,8 @@ define signext i8 @vreduce_smin_nxv2i8( %v) { ; CHECK-LABEL: vreduce_smin_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmin.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vredmin.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.smin.nxv2i8( %v) @@ -196,8 +182,7 @@ ; CHECK-LABEL: vreduce_and_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.and.nxv2i8( %v) @@ -210,8 +195,7 @@ ; CHECK-LABEL: vreduce_or_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.or.nxv2i8( %v) @@ -252,8 +236,7 @@ ; CHECK-LABEL: vreduce_umax_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.umax.nxv4i8( %v) @@ -265,10 +248,8 @@ define signext i8 @vreduce_smax_nxv4i8( %v) { ; CHECK-LABEL: vreduce_smax_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.smax.nxv4i8( %v) @@ -281,8 +262,7 @@ ; CHECK-LABEL: vreduce_umin_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.umin.nxv4i8( %v) @@ -294,10 +274,8 @@ define signext i8 @vreduce_smin_nxv4i8( %v) { ; CHECK-LABEL: vreduce_smin_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmin.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vredmin.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.smin.nxv4i8( %v) @@ -310,8 +288,7 @@ ; CHECK-LABEL: vreduce_and_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.and.nxv4i8( %v) @@ -324,8 +301,7 @@ ; CHECK-LABEL: vreduce_or_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.or.nxv4i8( %v) @@ -396,8 +372,7 @@ ; CHECK-LABEL: vreduce_umax_nxv1i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.umax.nxv1i16( %v) @@ -409,10 +384,8 @@ define signext i16 @vreduce_smax_nxv1i16( %v) { ; CHECK-LABEL: vreduce_smax_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.smax.nxv1i16( %v) @@ -425,8 +398,7 @@ ; CHECK-LABEL: vreduce_umin_nxv1i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.umin.nxv1i16( %v) @@ -436,25 +408,12 @@ declare i16 @llvm.vector.reduce.smin.nxv1i16() define signext i16 @vreduce_smin_nxv1i16( %v) { -; RV32-LABEL: vreduce_smin_nxv1i16: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 8 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vredmin.vs v8, v8, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_nxv1i16: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 8 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vredmin.vs v8, v8, v9 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.smin.nxv1i16( %v) ret i16 %red } @@ -465,8 +424,7 @@ ; CHECK-LABEL: vreduce_and_nxv1i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.and.nxv1i16( %v) @@ -479,8 +437,7 @@ ; CHECK-LABEL: vreduce_or_nxv1i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.or.nxv1i16( %v) @@ -551,8 +508,7 @@ ; CHECK-LABEL: vreduce_umax_nxv2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.umax.nxv2i16( %v) @@ -564,10 +520,8 @@ define signext i16 @vreduce_smax_nxv2i16( %v) { ; CHECK-LABEL: vreduce_smax_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.smax.nxv2i16( %v) @@ -580,8 +534,7 @@ ; CHECK-LABEL: vreduce_umin_nxv2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.umin.nxv2i16( %v) @@ -591,25 +544,12 @@ declare i16 @llvm.vector.reduce.smin.nxv2i16() define signext i16 @vreduce_smin_nxv2i16( %v) { -; RV32-LABEL: vreduce_smin_nxv2i16: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 8 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vredmin.vs v8, v8, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_nxv2i16: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 8 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vredmin.vs v8, v8, v9 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.smin.nxv2i16( %v) ret i16 %red } @@ -620,8 +560,7 @@ ; CHECK-LABEL: vreduce_and_nxv2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.and.nxv2i16( %v) @@ -634,8 +573,7 @@ ; CHECK-LABEL: vreduce_or_nxv2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.or.nxv2i16( %v) @@ -706,8 +644,7 @@ ; CHECK-LABEL: vreduce_umax_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.umax.nxv4i16( %v) @@ -719,10 +656,8 @@ define signext i16 @vreduce_smax_nxv4i16( %v) { ; CHECK-LABEL: vreduce_smax_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.smax.nxv4i16( %v) @@ -735,8 +670,7 @@ ; CHECK-LABEL: vreduce_umin_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.umin.nxv4i16( %v) @@ -746,25 +680,12 @@ declare i16 @llvm.vector.reduce.smin.nxv4i16() define signext i16 @vreduce_smin_nxv4i16( %v) { -; RV32-LABEL: vreduce_smin_nxv4i16: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 8 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vredmin.vs v8, v8, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_nxv4i16: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 8 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vredmin.vs v8, v8, v9 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.smin.nxv4i16( %v) ret i16 %red } @@ -775,8 +696,7 @@ ; CHECK-LABEL: vreduce_and_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.and.nxv4i16( %v) @@ -789,8 +709,7 @@ ; CHECK-LABEL: vreduce_or_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.or.nxv4i16( %v) @@ -861,8 +780,7 @@ ; CHECK-LABEL: vreduce_umax_nxv1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.umax.nxv1i32( %v) @@ -874,10 +792,8 @@ define signext i32 @vreduce_smax_nxv1i32( %v) { ; CHECK-LABEL: vreduce_smax_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.smax.nxv1i32( %v) @@ -890,8 +806,7 @@ ; CHECK-LABEL: vreduce_umin_nxv1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.umin.nxv1i32( %v) @@ -901,25 +816,12 @@ declare i32 @llvm.vector.reduce.smin.nxv1i32() define signext i32 @vreduce_smin_nxv1i32( %v) { -; RV32-LABEL: vreduce_smin_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vredmin.vs v8, v8, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 524288 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vredmin.vs v8, v8, v9 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.smin.nxv1i32( %v) ret i32 %red } @@ -930,8 +832,7 @@ ; CHECK-LABEL: vreduce_and_nxv1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.and.nxv1i32( %v) @@ -944,8 +845,7 @@ ; CHECK-LABEL: vreduce_or_nxv1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.or.nxv1i32( %v) @@ -1016,8 +916,7 @@ ; CHECK-LABEL: vreduce_umax_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v9 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.umax.nxv2i32( %v) @@ -1029,10 +928,8 @@ define signext i32 @vreduce_smax_nxv2i32( %v) { ; CHECK-LABEL: vreduce_smax_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.smax.nxv2i32( %v) @@ -1045,8 +942,7 @@ ; CHECK-LABEL: vreduce_umin_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredminu.vs v8, v8, v9 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.umin.nxv2i32( %v) @@ -1056,25 +952,12 @@ declare i32 @llvm.vector.reduce.smin.nxv2i32() define signext i32 @vreduce_smin_nxv2i32( %v) { -; RV32-LABEL: vreduce_smin_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vredmin.vs v8, v8, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 524288 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vredmin.vs v8, v8, v9 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.smin.nxv2i32( %v) ret i32 %red } @@ -1085,8 +968,7 @@ ; CHECK-LABEL: vreduce_and_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vredand.vs v8, v8, v9 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.and.nxv2i32( %v) @@ -1099,8 +981,7 @@ ; CHECK-LABEL: vreduce_or_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredor.vs v8, v8, v9 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.or.nxv2i32( %v) @@ -1171,8 +1052,7 @@ ; CHECK-LABEL: vreduce_umax_nxv4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v10 +; CHECK-NEXT: vredmaxu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.umax.nxv4i32( %v) @@ -1184,10 +1064,8 @@ define signext i32 @vreduce_smax_nxv4i32( %v) { ; CHECK-LABEL: vreduce_smax_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vredmax.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.smax.nxv4i32( %v) @@ -1199,10 +1077,8 @@ define signext i32 @vreduce_umin_nxv4i32( %v) { ; CHECK-LABEL: vreduce_umin_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vredminu.vs v8, v8, v10 +; CHECK-NEXT: vredminu.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.umin.nxv4i32( %v) @@ -1212,25 +1088,12 @@ declare i32 @llvm.vector.reduce.smin.nxv4i32() define signext i32 @vreduce_smin_nxv4i32( %v) { -; RV32-LABEL: vreduce_smin_nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.s.x v10, a0 -; RV32-NEXT: vredmin.vs v8, v8, v10 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_smin_nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, 524288 -; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vredmin.vs v8, v8, v10 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_smin_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.smin.nxv4i32( %v) ret i32 %red } @@ -1240,10 +1103,8 @@ define signext i32 @vreduce_and_nxv4i32( %v) { ; CHECK-LABEL: vreduce_and_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vredand.vs v8, v8, v10 +; CHECK-NEXT: vredand.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.and.nxv4i32( %v) @@ -1256,8 +1117,7 @@ ; CHECK-LABEL: vreduce_or_nxv4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vredor.vs v8, v8, v10 +; CHECK-NEXT: vredor.vs v8, v8, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.or.nxv4i32( %v) @@ -1368,10 +1228,8 @@ define i64 @vreduce_umax_nxv1i64( %v) { ; RV32-LABEL: vreduce_umax_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, zero ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: vredmaxu.vs v8, v8, v9 +; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1382,8 +1240,7 @@ ; RV64-LABEL: vreduce_umax_nxv1i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vredmaxu.vs v8, v8, v9 +; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.umax.nxv1i64( %v) @@ -1395,31 +1252,19 @@ define i64 @vreduce_smax_nxv1i64( %v) { ; RV32-LABEL: vreduce_smax_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a0), zero ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: vredmax.vs v8, v8, v9 +; RV32-NEXT: vredmax.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smax_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: li a0, -1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vredmax.vs v8, v8, v9 +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vredmax.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.smax.nxv1i64( %v) @@ -1431,10 +1276,8 @@ define i64 @vreduce_umin_nxv1i64( %v) { ; RV32-LABEL: vreduce_umin_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, -1 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: vredminu.vs v8, v8, v9 +; RV32-NEXT: vredminu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1445,8 +1288,7 @@ ; RV64-LABEL: vreduce_umin_nxv1i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, -1 -; RV64-NEXT: vredminu.vs v8, v8, v9 +; RV64-NEXT: vredminu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.umin.nxv1i64( %v) @@ -1458,33 +1300,19 @@ define i64 @vreduce_smin_nxv1i64( %v) { ; RV32-LABEL: vreduce_smin_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: li a0, -1 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a0), zero ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: vredmin.vs v8, v8, v9 +; RV32-NEXT: vredmin.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smin_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: li a0, -1 -; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vredmin.vs v8, v8, v9 +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vredmin.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.smin.nxv1i64( %v) @@ -1496,10 +1324,8 @@ define i64 @vreduce_and_nxv1i64( %v) { ; RV32-LABEL: vreduce_and_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, -1 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: vredand.vs v8, v8, v9 +; RV32-NEXT: vredand.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1510,8 +1336,7 @@ ; RV64-LABEL: vreduce_and_nxv1i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, -1 -; RV64-NEXT: vredand.vs v8, v8, v9 +; RV64-NEXT: vredand.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.and.nxv1i64( %v) @@ -1523,10 +1348,8 @@ define i64 @vreduce_or_nxv1i64( %v) { ; RV32-LABEL: vreduce_or_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, zero ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: vredor.vs v8, v8, v9 +; RV32-NEXT: vredor.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1537,8 +1360,7 @@ ; RV64-LABEL: vreduce_or_nxv1i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vredor.vs v8, v8, v9 +; RV64-NEXT: vredor.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.or.nxv1i64( %v) @@ -1660,10 +1482,8 @@ define i64 @vreduce_umax_nxv2i64( %v) { ; RV32-LABEL: vreduce_umax_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v10, zero ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV32-NEXT: vredmaxu.vs v8, v8, v10 +; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1674,8 +1494,7 @@ ; RV64-LABEL: vreduce_umax_nxv2i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV64-NEXT: vmv.s.x v10, zero -; RV64-NEXT: vredmaxu.vs v8, v8, v10 +; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.umax.nxv2i64( %v) @@ -1687,31 +1506,19 @@ define i64 @vreduce_smax_nxv2i64( %v) { ; RV32-LABEL: vreduce_smax_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a0), zero ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV32-NEXT: vredmax.vs v8, v8, v10 +; RV32-NEXT: vredmax.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smax_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: li a0, -1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vredmax.vs v8, v8, v10 +; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vredmax.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.smax.nxv2i64( %v) @@ -1723,10 +1530,8 @@ define i64 @vreduce_umin_nxv2i64( %v) { ; RV32-LABEL: vreduce_umin_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v10, -1 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV32-NEXT: vredminu.vs v8, v8, v10 +; RV32-NEXT: vredminu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1736,10 +1541,8 @@ ; ; RV64-LABEL: vreduce_umin_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v10, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV64-NEXT: vredminu.vs v8, v8, v10 +; RV64-NEXT: vredminu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.umin.nxv2i64( %v) @@ -1751,33 +1554,19 @@ define i64 @vreduce_smin_nxv2i64( %v) { ; RV32-LABEL: vreduce_smin_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: li a0, -1 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a0), zero ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV32-NEXT: vredmin.vs v8, v8, v10 +; RV32-NEXT: vredmin.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smin_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: li a0, -1 -; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vredmin.vs v8, v8, v10 +; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vredmin.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.smin.nxv2i64( %v) @@ -1789,10 +1578,8 @@ define i64 @vreduce_and_nxv2i64( %v) { ; RV32-LABEL: vreduce_and_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v10, -1 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV32-NEXT: vredand.vs v8, v8, v10 +; RV32-NEXT: vredand.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1802,10 +1589,8 @@ ; ; RV64-LABEL: vreduce_and_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v10, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV64-NEXT: vredand.vs v8, v8, v10 +; RV64-NEXT: vredand.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.and.nxv2i64( %v) @@ -1817,10 +1602,8 @@ define i64 @vreduce_or_nxv2i64( %v) { ; RV32-LABEL: vreduce_or_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v10, zero ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV32-NEXT: vredor.vs v8, v8, v10 +; RV32-NEXT: vredor.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1831,8 +1614,7 @@ ; RV64-LABEL: vreduce_or_nxv2i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV64-NEXT: vmv.s.x v10, zero -; RV64-NEXT: vredor.vs v8, v8, v10 +; RV64-NEXT: vredor.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.or.nxv2i64( %v) @@ -1954,10 +1736,8 @@ define i64 @vreduce_umax_nxv4i64( %v) { ; RV32-LABEL: vreduce_umax_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v12, zero ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vredmaxu.vs v8, v8, v12 +; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1968,8 +1748,7 @@ ; RV64-LABEL: vreduce_umax_nxv4i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vmv.s.x v12, zero -; RV64-NEXT: vredmaxu.vs v8, v8, v12 +; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.umax.nxv4i64( %v) @@ -1981,31 +1760,19 @@ define i64 @vreduce_smax_nxv4i64( %v) { ; RV32-LABEL: vreduce_smax_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vredmax.vs v8, v8, v12 +; RV32-NEXT: vredmax.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smax_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: li a0, -1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vredmax.vs v8, v8, v12 +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vredmax.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.smax.nxv4i64( %v) @@ -2017,10 +1784,8 @@ define i64 @vreduce_umin_nxv4i64( %v) { ; RV32-LABEL: vreduce_umin_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v12, -1 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vredminu.vs v8, v8, v12 +; RV32-NEXT: vredminu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2030,10 +1795,8 @@ ; ; RV64-LABEL: vreduce_umin_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vredminu.vs v8, v8, v12 +; RV64-NEXT: vredminu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.umin.nxv4i64( %v) @@ -2045,33 +1808,19 @@ define i64 @vreduce_smin_nxv4i64( %v) { ; RV32-LABEL: vreduce_smin_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: li a0, -1 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vredmin.vs v8, v8, v12 +; RV32-NEXT: vredmin.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smin_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: li a0, -1 -; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vredmin.vs v8, v8, v12 +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vredmin.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.smin.nxv4i64( %v) @@ -2083,10 +1832,8 @@ define i64 @vreduce_and_nxv4i64( %v) { ; RV32-LABEL: vreduce_and_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v12, -1 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vredand.vs v8, v8, v12 +; RV32-NEXT: vredand.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2096,10 +1843,8 @@ ; ; RV64-LABEL: vreduce_and_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vredand.vs v8, v8, v12 +; RV64-NEXT: vredand.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.and.nxv4i64( %v) @@ -2111,10 +1856,8 @@ define i64 @vreduce_or_nxv4i64( %v) { ; RV32-LABEL: vreduce_or_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v12, zero ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vredor.vs v8, v8, v12 +; RV32-NEXT: vredor.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2125,8 +1868,7 @@ ; RV64-LABEL: vreduce_or_nxv4i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vmv.s.x v12, zero -; RV64-NEXT: vredor.vs v8, v8, v12 +; RV64-NEXT: vredor.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.or.nxv4i64( %v)