Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2800,6 +2800,13 @@ return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG); } +static bool hasNonZeroAVL(SDValue AVL) { + auto *RegisterAVL = dyn_cast(AVL); + auto *ImmAVL = dyn_cast(AVL); + return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) || + (ImmAVL && ImmAVL->getZExtValue() >= 1); +} + static MVT getLMUL2VT(MVT VT) { assert(VT.getVectorElementType().getSizeInBits() <= 64 && "Unexpected vector MVT"); @@ -2815,6 +2822,11 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { + // This is an unconditional insert - so if we can't prove that VL + // is greater than 0, we must force it to a non-zero value. + if (!hasNonZeroAVL(VL)) + VL = DAG.getConstant(1, DL, Subtarget.getXLenVT()); + SDValue Passthru = DAG.getUNDEF(VT); if (VT.isFloatingPoint()) // TODO: Use vmv.v.i for appropriate constants @@ -2828,6 +2840,7 @@ DAG.getConstant(1, DL, Subtarget.getXLenVT()), VT, DL, DAG, Subtarget); + // If the operand is a constant, sign extend to increase our chances // of being able to use a .vi instruction. ANY_EXTEND would become a // a zero extend and the simm5 check in isel would fail. @@ -5881,11 +5894,10 @@ SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); - SDValue IdentitySplat = - lowerScalarSplat(SDValue(), NeutralElem, DAG.getConstant(1, DL, XLenVT), - M1VT, DL, DAG, Subtarget); + SDValue IdentityElem = + lowerScalarInsert(NeutralElem, VL, M1VT, DL, DAG, Subtarget); SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec, - IdentitySplat, Mask, VL); + IdentityElem, Mask, VL); SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, DAG.getConstant(0, DL, XLenVT)); return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); @@ -5943,11 +5955,10 @@ auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); - SDValue ScalarSplat = - lowerScalarSplat(SDValue(), ScalarVal, DAG.getConstant(1, DL, XLenVT), - M1VT, DL, DAG, Subtarget); + SDValue ScalarElem = + lowerScalarInsert(ScalarVal, VL, M1VT, DL, DAG, Subtarget); SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), - VectorVal, ScalarSplat, Mask, VL); + VectorVal, ScalarElem, Mask, VL); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, DAG.getConstant(0, DL, XLenVT)); } @@ -6011,11 +6022,10 @@ MVT XLenVT = Subtarget.getXLenVT(); MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT; - SDValue StartSplat = lowerScalarSplat(SDValue(), Op.getOperand(0), - DAG.getConstant(1, DL, XLenVT), M1VT, - DL, DAG, Subtarget); + SDValue StartElem = + lowerScalarInsert(Op.getOperand(0), VL, M1VT, DL, DAG, Subtarget); SDValue Reduction = - DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL); + DAG.getNode(RVVOpcode, DL, M1VT, StartElem, Vec, StartElem, Mask, VL); SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction, DAG.getConstant(0, DL, XLenVT)); if (!VecVT.isInteger()) @@ -8006,7 +8016,8 @@ } // Try to fold ( x, (reduction. vec, start)) -static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG) { +static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { auto BinOpToRVVReduce = [](unsigned Opc) { switch (Opc) { default: @@ -8068,7 +8079,7 @@ ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL) return SDValue(); - if (!isOneConstant(ScalarV.getOperand(2))) + if (!hasNonZeroAVL(ScalarV.getOperand(2))) return SDValue(); // Check the scalar of ScalarV is neutral element @@ -8080,20 +8091,11 @@ if (!ScalarV.hasOneUse()) return SDValue(); - EVT SplatVT = ScalarV.getValueType(); SDValue NewStart = N->getOperand(1 - ReduceIdx); - unsigned SplatOpc = RISCVISD::VFMV_S_F_VL; - if (SplatVT.isInteger()) { - auto *C = dyn_cast(NewStart.getNode()); - if (!C || C->isZero() || !isInt<5>(C->getSExtValue())) - SplatOpc = RISCVISD::VMV_S_X_VL; - else - SplatOpc = RISCVISD::VMV_V_X_VL; - } SDValue NewScalarV = - DAG.getNode(SplatOpc, SDLoc(N), SplatVT, ScalarV.getOperand(0), NewStart, - ScalarV.getOperand(2)); + lowerScalarInsert(NewStart, ScalarV.getOperand(2), ScalarV.getSimpleValueType(), + SDLoc(N), DAG, Subtarget); SDValue NewReduce = DAG.getNode(Reduce.getOpcode(), SDLoc(Reduce), Reduce.getValueType(), Reduce.getOperand(0), Reduce.getOperand(1), NewScalarV, @@ -8295,7 +8297,7 @@ return V; if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) return V; - if (SDValue V = combineBinOpToReduce(N, DAG)) + if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) return V; // fold (add (select lhs, rhs, cc, 0, y), x) -> // (select lhs, rhs, cc, x, (add x, y)) @@ -8449,7 +8451,7 @@ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And); } - if (SDValue V = combineBinOpToReduce(N, DAG)) + if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) return V; if (DCI.isAfterLegalizeDAG()) @@ -8465,7 +8467,7 @@ const RISCVSubtarget &Subtarget) { SelectionDAG &DAG = DCI.DAG; - if (SDValue V = combineBinOpToReduce(N, DAG)) + if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) return V; if (DCI.isAfterLegalizeDAG()) @@ -8493,7 +8495,7 @@ DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1)); } - if (SDValue V = combineBinOpToReduce(N, DAG)) + if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) return V; // fold (xor (select cond, 0, y), x) -> // (select cond, x, (xor x, y)) @@ -9713,7 +9715,7 @@ case ISD::SMIN: case ISD::FMAXNUM: case ISD::FMINNUM: - return combineBinOpToReduce(N, DAG); + return combineBinOpToReduce(N, DAG, Subtarget); case ISD::SETCC: return performSETCCCombine(N, DAG, Subtarget); case ISD::SIGN_EXTEND_INREG: Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>) @@ -159,9 +159,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -176,9 +174,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -195,9 +191,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -212,9 +206,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -233,11 +225,9 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vfadd.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vfredusum.vs v8, v8, v16 +; CHECK-NEXT: vfredusum.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x half>, <128 x half>* %x @@ -248,20 +238,16 @@ define half @vreduce_ord_fadd_v128f16(<128 x half>* %x, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_v128f16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: li a2, 64 -; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v24, fa0 -; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vfredosum.vs v16, v16, v24 -; CHECK-NEXT: vfmv.f.s ft0, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, ft0 -; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vfredosum.vs v8, v8, v16 +; CHECK-NEXT: vfredosum.vs v8, v8, v24 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: vfredosum.vs v8, v16, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x half>, <128 x half>* %x @@ -367,7 +353,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 @@ -385,7 +371,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 @@ -497,7 +483,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 @@ -515,7 +501,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 @@ -563,7 +549,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v10 @@ -581,7 +567,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v10 @@ -602,9 +588,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -619,9 +603,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -636,7 +618,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v12 @@ -655,7 +637,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v12 @@ -678,11 +660,9 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vfadd.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vfredusum.vs v8, v8, v16 +; CHECK-NEXT: vfredusum.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <64 x float>, <64 x float>* %x @@ -693,20 +673,16 @@ define float @vreduce_ord_fadd_v64f32(<64 x float>* %x, float %s) { ; CHECK-LABEL: vreduce_ord_fadd_v64f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: li a2, 32 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v24, fa0 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vfredosum.vs v16, v16, v24 -; CHECK-NEXT: vfmv.f.s ft0, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, ft0 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vfredosum.vs v8, v8, v16 +; CHECK-NEXT: vfredosum.vs v8, v8, v24 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: vfredosum.vs v8, v16, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <64 x float>, <64 x float>* %x @@ -723,12 +699,12 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfredusum.vs v8, v24, v8 +; CHECK-NEXT: vfwadd.vv v0, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfredusum.vs v8, v0, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <64 x half>, <64 x half>* %x @@ -746,13 +722,13 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v16, a0 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v16, v16, v24 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s ft0, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v16 @@ -926,7 +902,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 @@ -944,7 +920,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 @@ -992,7 +968,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v10 @@ -1010,7 +986,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v10 @@ -1058,7 +1034,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v12 @@ -1076,7 +1052,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v12 @@ -1137,7 +1113,7 @@ ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v24, v8 @@ -1157,13 +1133,13 @@ ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v16, 16 -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v16, v16, v24 ; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s ft0, v16 -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v16 @@ -1179,18 +1155,27 @@ declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>) define half @vreduce_fmin_v2f16(<2 x half>* %x) { -; CHECK-LABEL: vreduce_fmin_v2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI68_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI68_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v2f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI68_0) +; RV32-NEXT: flh ft0, %lo(.LCPI68_0)(a1) +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v2f16: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI68_0) +; RV64-NEXT: flh ft0, %lo(.LCPI68_0)(a1) +; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <2 x half>, <2 x half>* %x %red = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> %v) ret half %red @@ -1199,54 +1184,81 @@ declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>) define half @vreduce_fmin_v4f16(<4 x half>* %x) { -; CHECK-LABEL: vreduce_fmin_v4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI69_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI69_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v4f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI69_0) +; RV32-NEXT: flh ft0, %lo(.LCPI69_0)(a1) +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v4f16: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI69_0) +; RV64-NEXT: flh ft0, %lo(.LCPI69_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x half>, <4 x half>* %x %red = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v) ret half %red } define half @vreduce_fmin_v4f16_nonans(<4 x half>* %x) { -; CHECK-LABEL: vreduce_fmin_v4f16_nonans: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI70_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI70_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v4f16_nonans: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI70_0) +; RV32-NEXT: flh ft0, %lo(.LCPI70_0)(a1) +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v4f16_nonans: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI70_0) +; RV64-NEXT: flh ft0, %lo(.LCPI70_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x half>, <4 x half>* %x %red = call nnan half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v) ret half %red } define half @vreduce_fmin_v4f16_nonans_noinfs(<4 x half>* %x) { -; CHECK-LABEL: vreduce_fmin_v4f16_nonans_noinfs: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI71_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI71_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v4f16_nonans_noinfs: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI71_0) +; RV32-NEXT: flh ft0, %lo(.LCPI71_0)(a1) +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v4f16_nonans_noinfs: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI71_0) +; RV64-NEXT: flh ft0, %lo(.LCPI71_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x half>, <4 x half>* %x %red = call nnan ninf half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v) ret half %red @@ -1261,14 +1273,12 @@ ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: lui a1, %hi(.LCPI72_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI72_0)(a1) ; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vfmv.s.f v24, ft0 ; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: lui a0, %hi(.LCPI72_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI72_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v16, (a0), zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v16 +; CHECK-NEXT: vfredmin.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x half>, <128 x half>* %x @@ -1279,18 +1289,27 @@ declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) define float @vreduce_fmin_v2f32(<2 x float>* %x) { -; CHECK-LABEL: vreduce_fmin_v2f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI73_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI73_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v2f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI73_0) +; RV32-NEXT: flw ft0, %lo(.LCPI73_0)(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v2f32: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI73_0) +; RV64-NEXT: flw ft0, %lo(.LCPI73_0)(a1) +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <2 x float>, <2 x float>* %x %red = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %v) ret float %red @@ -1299,54 +1318,81 @@ declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) define float @vreduce_fmin_v4f32(<4 x float>* %x) { -; CHECK-LABEL: vreduce_fmin_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI74_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI74_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v4f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI74_0) +; RV32-NEXT: flw ft0, %lo(.LCPI74_0)(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v4f32: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI74_0) +; RV64-NEXT: flw ft0, %lo(.LCPI74_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x float>, <4 x float>* %x %red = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) ret float %red } define float @vreduce_fmin_v4f32_nonans(<4 x float>* %x) { -; CHECK-LABEL: vreduce_fmin_v4f32_nonans: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI75_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI75_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v4f32_nonans: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI75_0) +; RV32-NEXT: flw ft0, %lo(.LCPI75_0)(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v4f32_nonans: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI75_0) +; RV64-NEXT: flw ft0, %lo(.LCPI75_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x float>, <4 x float>* %x %red = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) ret float %red } define float @vreduce_fmin_v4f32_nonans_noinfs(<4 x float>* %x) { -; CHECK-LABEL: vreduce_fmin_v4f32_nonans_noinfs: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI76_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI76_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v4f32_nonans_noinfs: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI76_0) +; RV32-NEXT: flw ft0, %lo(.LCPI76_0)(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v4f32_nonans_noinfs: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI76_0) +; RV64-NEXT: flw ft0, %lo(.LCPI76_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x float>, <4 x float>* %x %red = call nnan ninf float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) ret float %red @@ -1360,20 +1406,18 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: addi a2, a0, 384 -; CHECK-NEXT: vle32.v v16, (a2) -; CHECK-NEXT: addi a2, a0, 256 +; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: addi a1, a0, 256 ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: vle32.v v0, (a2) +; CHECK-NEXT: vle32.v v0, (a1) +; CHECK-NEXT: lui a0, %hi(.LCPI77_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI77_0)(a0) ; CHECK-NEXT: vfmin.vv v16, v24, v16 ; CHECK-NEXT: vfmin.vv v8, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: lui a0, %hi(.LCPI77_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI77_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v16, (a0), zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1385,18 +1429,27 @@ declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) define double @vreduce_fmin_v2f64(<2 x double>* %x) { -; CHECK-LABEL: vreduce_fmin_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI78_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI78_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI78_0) +; RV32-NEXT: fld ft0, %lo(.LCPI78_0)(a1) +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI78_0) +; RV64-NEXT: fld ft0, %lo(.LCPI78_0)(a1) +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <2 x double>, <2 x double>* %x %red = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %v) ret double %red @@ -1405,54 +1458,81 @@ declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) define double @vreduce_fmin_v4f64(<4 x double>* %x) { -; CHECK-LABEL: vreduce_fmin_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI79_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI79_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI79_0) +; RV32-NEXT: fld ft0, %lo(.LCPI79_0)(a1) +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vfmv.s.f v10, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v10 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI79_0) +; RV64-NEXT: fld ft0, %lo(.LCPI79_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vfmv.s.f v10, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v10 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x double>, <4 x double>* %x %red = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v) ret double %red } define double @vreduce_fmin_v4f64_nonans(<4 x double>* %x) { -; CHECK-LABEL: vreduce_fmin_v4f64_nonans: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI80_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI80_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v4f64_nonans: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI80_0) +; RV32-NEXT: fld ft0, %lo(.LCPI80_0)(a1) +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vfmv.s.f v10, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v10 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v4f64_nonans: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI80_0) +; RV64-NEXT: fld ft0, %lo(.LCPI80_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vfmv.s.f v10, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v10 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x double>, <4 x double>* %x %red = call nnan double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v) ret double %red } define double @vreduce_fmin_v4f64_nonans_noinfs(<4 x double>* %x) { -; CHECK-LABEL: vreduce_fmin_v4f64_nonans_noinfs: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI81_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI81_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v4f64_nonans_noinfs: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI81_0) +; RV32-NEXT: fld ft0, %lo(.LCPI81_0)(a1) +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vfmv.s.f v10, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v10 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v4f64_nonans_noinfs: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI81_0) +; RV64-NEXT: fld ft0, %lo(.LCPI81_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vfmv.s.f v10, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v10 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x double>, <4 x double>* %x %red = call nnan ninf double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v) ret double %red @@ -1461,21 +1541,33 @@ declare double @llvm.vector.reduce.fmin.v32f64(<32 x double>) define double @vreduce_fmin_v32f64(<32 x double>* %x) { -; CHECK-LABEL: vreduce_fmin_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: lui a0, %hi(.LCPI82_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI82_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v16 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v32f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: lui a1, %hi(.LCPI82_0) +; RV32-NEXT: fld ft0, %lo(.LCPI82_0)(a1) +; RV32-NEXT: vle64.v v16, (a0) +; RV32-NEXT: vfmv.s.f v24, ft0 +; RV32-NEXT: vfmin.vv v8, v8, v16 +; RV32-NEXT: vfredmin.vs v8, v8, v24 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v32f64: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI82_0) +; RV64-NEXT: fld ft0, %lo(.LCPI82_0)(a1) +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vfmv.s.f v24, ft0 +; RV64-NEXT: vfmin.vv v8, v8, v16 +; RV64-NEXT: vfredmin.vs v8, v8, v24 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <32 x double>, <32 x double>* %x %red = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %v) ret double %red @@ -1484,18 +1576,27 @@ declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>) define half @vreduce_fmax_v2f16(<2 x half>* %x) { -; CHECK-LABEL: vreduce_fmax_v2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI83_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI83_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v2f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI83_0) +; RV32-NEXT: flh ft0, %lo(.LCPI83_0)(a1) +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v2f16: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI83_0) +; RV64-NEXT: flh ft0, %lo(.LCPI83_0)(a1) +; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <2 x half>, <2 x half>* %x %red = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> %v) ret half %red @@ -1504,54 +1605,81 @@ declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>) define half @vreduce_fmax_v4f16(<4 x half>* %x) { -; CHECK-LABEL: vreduce_fmax_v4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI84_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI84_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v4f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI84_0) +; RV32-NEXT: flh ft0, %lo(.LCPI84_0)(a1) +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v4f16: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI84_0) +; RV64-NEXT: flh ft0, %lo(.LCPI84_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x half>, <4 x half>* %x %red = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v) ret half %red } define half @vreduce_fmax_v4f16_nonans(<4 x half>* %x) { -; CHECK-LABEL: vreduce_fmax_v4f16_nonans: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI85_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI85_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v4f16_nonans: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI85_0) +; RV32-NEXT: flh ft0, %lo(.LCPI85_0)(a1) +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v4f16_nonans: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI85_0) +; RV64-NEXT: flh ft0, %lo(.LCPI85_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x half>, <4 x half>* %x %red = call nnan half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v) ret half %red } define half @vreduce_fmax_v4f16_nonans_noinfs(<4 x half>* %x) { -; CHECK-LABEL: vreduce_fmax_v4f16_nonans_noinfs: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI86_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI86_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v4f16_nonans_noinfs: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI86_0) +; RV32-NEXT: flh ft0, %lo(.LCPI86_0)(a1) +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v4f16_nonans_noinfs: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI86_0) +; RV64-NEXT: flh ft0, %lo(.LCPI86_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x half>, <4 x half>* %x %red = call nnan ninf half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v) ret half %red @@ -1566,14 +1694,12 @@ ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: lui a1, %hi(.LCPI87_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI87_0)(a1) ; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vfmv.s.f v24, ft0 ; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: lui a0, %hi(.LCPI87_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI87_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v16, (a0), zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v16 +; CHECK-NEXT: vfredmax.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x half>, <128 x half>* %x @@ -1584,18 +1710,27 @@ declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>) define float @vreduce_fmax_v2f32(<2 x float>* %x) { -; CHECK-LABEL: vreduce_fmax_v2f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI88_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI88_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v2f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI88_0) +; RV32-NEXT: flw ft0, %lo(.LCPI88_0)(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v2f32: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI88_0) +; RV64-NEXT: flw ft0, %lo(.LCPI88_0)(a1) +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <2 x float>, <2 x float>* %x %red = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %v) ret float %red @@ -1604,54 +1739,81 @@ declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) define float @vreduce_fmax_v4f32(<4 x float>* %x) { -; CHECK-LABEL: vreduce_fmax_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI89_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI89_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v4f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI89_0) +; RV32-NEXT: flw ft0, %lo(.LCPI89_0)(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v4f32: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI89_0) +; RV64-NEXT: flw ft0, %lo(.LCPI89_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x float>, <4 x float>* %x %red = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) ret float %red } define float @vreduce_fmax_v4f32_nonans(<4 x float>* %x) { -; CHECK-LABEL: vreduce_fmax_v4f32_nonans: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI90_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI90_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v4f32_nonans: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI90_0) +; RV32-NEXT: flw ft0, %lo(.LCPI90_0)(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v4f32_nonans: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI90_0) +; RV64-NEXT: flw ft0, %lo(.LCPI90_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x float>, <4 x float>* %x %red = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) ret float %red } define float @vreduce_fmax_v4f32_nonans_noinfs(<4 x float>* %x) { -; CHECK-LABEL: vreduce_fmax_v4f32_nonans_noinfs: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI91_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI91_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v4f32_nonans_noinfs: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI91_0) +; RV32-NEXT: flw ft0, %lo(.LCPI91_0)(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v4f32_nonans_noinfs: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI91_0) +; RV64-NEXT: flw ft0, %lo(.LCPI91_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x float>, <4 x float>* %x %red = call nnan ninf float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) ret float %red @@ -1665,20 +1827,18 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: addi a2, a0, 384 -; CHECK-NEXT: vle32.v v16, (a2) -; CHECK-NEXT: addi a2, a0, 256 +; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: addi a1, a0, 256 ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: vle32.v v0, (a2) +; CHECK-NEXT: vle32.v v0, (a1) +; CHECK-NEXT: lui a0, %hi(.LCPI92_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI92_0)(a0) ; CHECK-NEXT: vfmax.vv v16, v24, v16 ; CHECK-NEXT: vfmax.vv v8, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: lui a0, %hi(.LCPI92_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI92_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v16, (a0), zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1690,18 +1850,27 @@ declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) define double @vreduce_fmax_v2f64(<2 x double>* %x) { -; CHECK-LABEL: vreduce_fmax_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI93_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI93_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI93_0) +; RV32-NEXT: fld ft0, %lo(.LCPI93_0)(a1) +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI93_0) +; RV64-NEXT: fld ft0, %lo(.LCPI93_0)(a1) +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <2 x double>, <2 x double>* %x %red = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %v) ret double %red @@ -1710,54 +1879,81 @@ declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) define double @vreduce_fmax_v4f64(<4 x double>* %x) { -; CHECK-LABEL: vreduce_fmax_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI94_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI94_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI94_0) +; RV32-NEXT: fld ft0, %lo(.LCPI94_0)(a1) +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vfmv.s.f v10, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v10 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI94_0) +; RV64-NEXT: fld ft0, %lo(.LCPI94_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vfmv.s.f v10, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v10 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x double>, <4 x double>* %x %red = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v) ret double %red } define double @vreduce_fmax_v4f64_nonans(<4 x double>* %x) { -; CHECK-LABEL: vreduce_fmax_v4f64_nonans: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI95_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI95_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v4f64_nonans: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI95_0) +; RV32-NEXT: fld ft0, %lo(.LCPI95_0)(a1) +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vfmv.s.f v10, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v10 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v4f64_nonans: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI95_0) +; RV64-NEXT: fld ft0, %lo(.LCPI95_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vfmv.s.f v10, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v10 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x double>, <4 x double>* %x %red = call nnan double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v) ret double %red } define double @vreduce_fmax_v4f64_nonans_noinfs(<4 x double>* %x) { -; CHECK-LABEL: vreduce_fmax_v4f64_nonans_noinfs: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI96_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI96_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v4f64_nonans_noinfs: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI96_0) +; RV32-NEXT: fld ft0, %lo(.LCPI96_0)(a1) +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vfmv.s.f v10, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v10 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v4f64_nonans_noinfs: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI96_0) +; RV64-NEXT: fld ft0, %lo(.LCPI96_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vfmv.s.f v10, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v10 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x double>, <4 x double>* %x %red = call nnan ninf double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v) ret double %red @@ -1766,21 +1962,33 @@ declare double @llvm.vector.reduce.fmax.v32f64(<32 x double>) define double @vreduce_fmax_v32f64(<32 x double>* %x) { -; CHECK-LABEL: vreduce_fmax_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: lui a0, %hi(.LCPI97_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI97_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v16 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v32f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: lui a1, %hi(.LCPI97_0) +; RV32-NEXT: fld ft0, %lo(.LCPI97_0)(a1) +; RV32-NEXT: vle64.v v16, (a0) +; RV32-NEXT: vfmv.s.f v24, ft0 +; RV32-NEXT: vfmax.vv v8, v8, v16 +; RV32-NEXT: vfredmax.vs v8, v8, v24 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v32f64: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI97_0) +; RV64-NEXT: fld ft0, %lo(.LCPI97_0)(a1) +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vfmv.s.f v24, ft0 +; RV64-NEXT: vfmax.vv v8, v8, v16 +; RV64-NEXT: vfredmax.vs v8, v8, v24 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <32 x double>, <32 x double>* %x %red = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %v) ret double %red Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -88,9 +88,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -107,9 +105,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -126,9 +122,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -147,11 +141,9 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vadd.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vredsum.vs v8, v8, v16 +; CHECK-NEXT: vredsum.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -222,7 +214,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -240,7 +232,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 @@ -274,7 +266,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -292,7 +284,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 @@ -376,7 +368,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -394,7 +386,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 @@ -415,9 +407,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -432,7 +422,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v10 @@ -451,7 +441,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v10 @@ -472,9 +462,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -489,7 +477,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v12 @@ -508,7 +496,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v12 @@ -531,11 +519,9 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vadd.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vredsum.vs v8, v8, v16 +; CHECK-NEXT: vredsum.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -552,12 +538,12 @@ ; CHECK-NEXT: li a0, 64 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vredsum.vs v8, v24, v8 +; CHECK-NEXT: vwadd.vv v0, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vredsum.vs v8, v0, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i8>, <128 x i8>* %x @@ -575,12 +561,12 @@ ; CHECK-NEXT: li a0, 64 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vwaddu.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vredsum.vs v8, v24, v8 +; CHECK-NEXT: vwaddu.vv v0, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vredsum.vs v8, v0, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i8>, <128 x i8>* %x @@ -652,7 +638,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -670,7 +656,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 @@ -754,7 +740,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -772,7 +758,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 @@ -806,7 +792,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v10 @@ -824,7 +810,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v10 @@ -845,9 +831,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -862,7 +846,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v12 @@ -881,7 +865,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v12 @@ -904,11 +888,9 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vadd.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vredsum.vs v8, v8, v16 +; CHECK-NEXT: vredsum.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -925,12 +907,12 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vredsum.vs v8, v24, v8 +; CHECK-NEXT: vwadd.vv v0, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vredsum.vs v8, v0, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i16>, <64 x i16>* %x @@ -948,12 +930,12 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vwaddu.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vredsum.vs v8, v24, v8 +; CHECK-NEXT: vwaddu.vv v0, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vredsum.vs v8, v0, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i16>, <64 x i16>* %x @@ -1179,7 +1161,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v9 @@ -1213,7 +1195,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v9 @@ -1276,7 +1258,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v10 @@ -1310,7 +1292,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v10 @@ -1373,7 +1355,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v12 @@ -1407,7 +1389,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v12 @@ -1484,7 +1466,7 @@ ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwadd.vv v24, v8, v16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v8, zero ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredsum.vs v8, v24, v8 @@ -1525,7 +1507,7 @@ ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwaddu.vv v24, v8, v16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v8, zero ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredsum.vs v8, v24, v8 @@ -1887,7 +1869,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 @@ -1905,7 +1887,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 @@ -1923,7 +1905,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 @@ -1941,9 +1923,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1960,7 +1940,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v10 @@ -1979,7 +1959,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, -1 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v12 @@ -1998,7 +1978,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v16 @@ -2020,7 +2000,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vand.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v16 @@ -2052,7 +2032,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 @@ -2070,7 +2050,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 @@ -2088,9 +2068,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2106,7 +2084,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v10 @@ -2125,7 +2103,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, -1 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v12 @@ -2144,7 +2122,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v16 @@ -2166,7 +2144,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vand.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v16 @@ -2198,7 +2176,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 @@ -2216,9 +2194,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2234,7 +2210,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v10 @@ -2252,7 +2228,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, -1 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v12 @@ -2271,7 +2247,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v16 @@ -2293,7 +2269,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vand.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v16 @@ -2350,9 +2326,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v9, -1 -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2383,7 +2357,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v10, -1 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v10 @@ -2416,7 +2390,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v12 @@ -2449,7 +2423,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, -1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v16 @@ -2488,7 +2462,7 @@ ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, -1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v16 @@ -2539,7 +2513,7 @@ ; RV64-NEXT: vand.vv v16, v24, v16 ; RV64-NEXT: vand.vv v8, v8, v0 ; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, -1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v16 @@ -2636,9 +2610,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2655,9 +2627,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2674,9 +2644,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2695,11 +2663,9 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vredor.vs v8, v8, v16 +; CHECK-NEXT: vredor.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -2793,9 +2759,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2812,9 +2776,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2833,11 +2795,9 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vredor.vs v8, v8, v16 +; CHECK-NEXT: vredor.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -2931,9 +2891,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2952,11 +2910,9 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vredor.vs v8, v8, v16 +; CHECK-NEXT: vredor.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -3272,9 +3228,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3291,9 +3245,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3310,9 +3262,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3331,11 +3281,9 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vxor.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vredxor.vs v8, v8, v16 +; CHECK-NEXT: vredxor.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -3429,9 +3377,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3448,9 +3394,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3469,11 +3413,9 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vxor.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vredxor.vs v8, v8, v16 +; CHECK-NEXT: vredxor.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -3567,9 +3509,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3588,11 +3528,9 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vxor.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vredxor.vs v8, v8, v16 +; CHECK-NEXT: vredxor.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -3913,9 +3851,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vredmin.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3933,9 +3869,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vredmin.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3953,9 +3887,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredmin.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3974,12 +3906,10 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) -; CHECK-NEXT: vmin.vv v8, v8, v16 ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vredmin.vs v8, v8, v16 +; CHECK-NEXT: vmv.s.x v24, a0 +; CHECK-NEXT: vmin.vv v8, v8, v16 +; CHECK-NEXT: vredmin.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -4127,9 +4057,7 @@ ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV32-NEXT: vmv.s.x v12, a0 -; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4141,9 +4069,7 @@ ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4162,9 +4088,7 @@ ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV32-NEXT: vmv.s.x v16, a0 -; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4176,9 +4100,7 @@ ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4197,13 +4119,11 @@ ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle16.v v16, (a0) -; RV32-NEXT: vmin.vv v8, v8, v16 ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v16, a0 -; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; RV32-NEXT: vredmin.vs v8, v8, v16 +; RV32-NEXT: vmv.s.x v24, a0 +; RV32-NEXT: vmin.vv v8, v8, v16 +; RV32-NEXT: vredmin.vs v8, v8, v24 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; @@ -4214,13 +4134,11 @@ ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle16.v v16, (a0) -; RV64-NEXT: vmin.vv v8, v8, v16 ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; RV64-NEXT: vredmin.vs v8, v8, v16 +; RV64-NEXT: vmv.s.x v24, a0 +; RV64-NEXT: vmin.vv v8, v8, v16 +; RV64-NEXT: vredmin.vs v8, v8, v24 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -4368,9 +4286,7 @@ ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vmv.s.x v16, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4382,9 +4298,7 @@ ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4403,13 +4317,11 @@ ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle32.v v16, (a0) -; RV32-NEXT: vmin.vv v8, v8, v16 ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v16, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vredmin.vs v8, v8, v16 +; RV32-NEXT: vmv.s.x v24, a0 +; RV32-NEXT: vmin.vv v8, v8, v16 +; RV32-NEXT: vredmin.vs v8, v8, v24 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; @@ -4420,13 +4332,11 @@ ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle32.v v16, (a0) -; RV64-NEXT: vmin.vv v8, v8, v16 ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV64-NEXT: vredmin.vs v8, v8, v16 +; RV64-NEXT: vmv.s.x v24, a0 +; RV64-NEXT: vmin.vv v8, v8, v16 +; RV64-NEXT: vredmin.vs v8, v8, v24 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -4824,9 +4734,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4844,9 +4752,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4864,9 +4770,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4885,12 +4789,10 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) -; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vredmax.vs v8, v8, v16 +; CHECK-NEXT: vmv.s.x v24, a0 +; CHECK-NEXT: vmax.vv v8, v8, v16 +; CHECK-NEXT: vredmax.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -4989,9 +4891,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5009,9 +4909,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5030,12 +4928,10 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vredmax.vs v8, v8, v16 +; CHECK-NEXT: vmv.s.x v24, a0 +; CHECK-NEXT: vmax.vv v8, v8, v16 +; CHECK-NEXT: vredmax.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -5134,9 +5030,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5155,12 +5049,10 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vredmax.vs v8, v8, v16 +; CHECK-NEXT: vmv.s.x v24, a0 +; CHECK-NEXT: vmax.vv v8, v8, v16 +; CHECK-NEXT: vredmax.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -5476,7 +5368,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 @@ -5494,7 +5386,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 @@ -5512,7 +5404,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 @@ -5530,9 +5422,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5549,7 +5439,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v10 @@ -5568,7 +5458,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, -1 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v12 @@ -5587,7 +5477,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v16 @@ -5609,7 +5499,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vminu.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v16 @@ -5641,7 +5531,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 @@ -5659,7 +5549,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 @@ -5677,9 +5567,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5695,7 +5583,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v10 @@ -5714,7 +5602,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, -1 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v12 @@ -5733,7 +5621,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v16 @@ -5755,7 +5643,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vminu.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v16 @@ -5787,7 +5675,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 @@ -5805,9 +5693,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5823,7 +5709,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v10 @@ -5841,7 +5727,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, -1 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v12 @@ -5860,7 +5746,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v16 @@ -5882,7 +5768,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vminu.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v16 @@ -5939,9 +5825,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v9, -1 -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -5972,7 +5856,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v10, -1 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v10 @@ -6005,7 +5889,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v12 @@ -6038,7 +5922,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, -1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v16 @@ -6077,7 +5961,7 @@ ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vminu.vv v8, v8, v16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, -1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v16 @@ -6128,7 +6012,7 @@ ; RV64-NEXT: vminu.vv v16, v24, v16 ; RV64-NEXT: vminu.vv v8, v8, v0 ; RV64-NEXT: vminu.vv v8, v8, v16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, -1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v16 @@ -6225,9 +6109,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6244,9 +6126,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6263,9 +6143,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6284,11 +6162,9 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vmaxu.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vredmaxu.vs v8, v8, v16 +; CHECK-NEXT: vredmaxu.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -6382,9 +6258,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6401,9 +6275,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6422,11 +6294,9 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vmaxu.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vredmaxu.vs v8, v8, v16 +; CHECK-NEXT: vredmaxu.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -6520,9 +6390,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6541,11 +6409,9 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vmaxu.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vredmaxu.vs v8, v8, v16 +; CHECK-NEXT: vredmaxu.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x Index: llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll +++ llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll @@ -4,7 +4,7 @@ define i64 @reduce_add(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_add: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v10 @@ -19,7 +19,7 @@ define i64 @reduce_add2(<4 x i64> %v) { ; CHECK-LABEL: reduce_add2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v10 @@ -34,7 +34,7 @@ define i64 @reduce_and(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_and: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v10 @@ -49,7 +49,7 @@ define i64 @reduce_and2(<4 x i64> %v) { ; CHECK-LABEL: reduce_and2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v10 @@ -64,7 +64,7 @@ define i64 @reduce_or(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_or: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v10 @@ -79,7 +79,7 @@ define i64 @reduce_or2(<4 x i64> %v) { ; CHECK-LABEL: reduce_or2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v10 @@ -94,7 +94,7 @@ define i64 @reduce_xor(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_xor: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v10 @@ -109,7 +109,7 @@ define i64 @reduce_xor2(<4 x i64> %v) { ; CHECK-LABEL: reduce_xor2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v10 @@ -125,7 +125,7 @@ define i64 @reduce_umax(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_umax: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v10 @@ -140,7 +140,7 @@ define i64 @reduce_umax2(<4 x i64> %v) { ; CHECK-LABEL: reduce_umax2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v10 @@ -155,7 +155,7 @@ define i64 @reduce_umin(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_umin: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v10 @@ -170,7 +170,7 @@ define i64 @reduce_umin2(<4 x i64> %v) { ; CHECK-LABEL: reduce_umin2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v10 @@ -185,7 +185,7 @@ define i64 @reduce_smax(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_smax: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v10 @@ -200,7 +200,7 @@ define i64 @reduce_smax2(<4 x i64> %v) { ; CHECK-LABEL: reduce_smax2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v10 @@ -215,7 +215,7 @@ define i64 @reduce_smin(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_smin: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredmin.vs v8, v8, v10 @@ -230,7 +230,7 @@ define i64 @reduce_smin2(<4 x i64> %v) { ; CHECK-LABEL: reduce_smin2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredmin.vs v8, v8, v10 @@ -245,9 +245,8 @@ define float @reduce_fadd(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fadd: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -259,9 +258,8 @@ define float @reduce_fadd2(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fadd2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -274,9 +272,8 @@ define float @reduce_fmax(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fmax: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -289,9 +286,8 @@ define float @reduce_fmin(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fmin: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -9,7 +9,7 @@ define half @vreduce_fadd_nxv1f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v9 @@ -22,7 +22,7 @@ define half @vreduce_ord_fadd_nxv1f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v9 @@ -37,7 +37,7 @@ define half @vreduce_fadd_nxv2f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v9 @@ -50,7 +50,7 @@ define half @vreduce_ord_fadd_nxv2f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v9 @@ -65,9 +65,8 @@ define half @vreduce_fadd_nxv4f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -78,9 +77,8 @@ define half @vreduce_ord_fadd_nxv4f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -93,7 +91,7 @@ define float @vreduce_fadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_fadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v9 @@ -106,7 +104,7 @@ define float @vreduce_ord_fadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v9 @@ -119,7 +117,7 @@ define float @vreduce_fwadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_fwadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 @@ -134,7 +132,7 @@ define float @vreduce_ord_fwadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 @@ -151,9 +149,8 @@ define float @vreduce_fadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: vreduce_fadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -164,9 +161,8 @@ define float @vreduce_ord_fadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -177,9 +173,9 @@ define float @vreduce_fwadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: vreduce_fwadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -192,9 +188,9 @@ define float @vreduce_ord_fwadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -209,7 +205,7 @@ define float @vreduce_fadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: vreduce_fadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v10 @@ -222,7 +218,7 @@ define float @vreduce_ord_fadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v10 @@ -235,7 +231,7 @@ define float @vreduce_fwadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: vreduce_fwadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 @@ -250,7 +246,7 @@ define float @vreduce_ord_fwadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 @@ -267,9 +263,8 @@ define double @vreduce_fadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: vreduce_fadd_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -280,9 +275,8 @@ define double @vreduce_ord_fadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -293,9 +287,9 @@ define double @vreduce_fwadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: vreduce_fwadd_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -308,9 +302,9 @@ define double @vreduce_ord_fwadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -325,7 +319,7 @@ define double @vreduce_fadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: vreduce_fadd_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v10 @@ -338,7 +332,7 @@ define double @vreduce_ord_fadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v10 @@ -351,7 +345,7 @@ define double @vreduce_fwadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: vreduce_fwadd_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 @@ -366,7 +360,7 @@ define double @vreduce_ord_fwadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 @@ -383,7 +377,7 @@ define double @vreduce_fadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: vreduce_fadd_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v12 @@ -396,7 +390,7 @@ define double @vreduce_ord_fadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v12 @@ -409,7 +403,7 @@ define double @vreduce_fwadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: vreduce_fwadd_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v10 @@ -424,7 +418,7 @@ define double @vreduce_ord_fwadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v10 @@ -442,9 +436,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI30_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI30_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -457,9 +451,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f16_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI31_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI31_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI31_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -472,9 +466,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f16_nonans_noinfs: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI32_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI32_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI32_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -489,9 +483,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI33_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI33_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI33_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -506,10 +500,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI34_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI34_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI34_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -522,13 +515,11 @@ define half @vreduce_fmin_nxv64f16( %v) { ; CHECK-LABEL: vreduce_fmin_nxv64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI35_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI35_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v16, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI35_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -542,9 +533,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI36_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI36_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI36_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -557,9 +548,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f32_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI37_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI37_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI37_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -572,9 +563,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f32_nonans_noinfs: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI38_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI38_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -589,10 +580,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI39_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI39_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI39_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -606,9 +596,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI40_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI40_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v10, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI40_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v10, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -622,13 +612,11 @@ define float @vreduce_fmin_nxv32f32( %v) { ; CHECK-LABEL: vreduce_fmin_nxv32f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI41_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI41_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v16, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI41_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -642,10 +630,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI42_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI42_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI42_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -657,10 +644,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f64_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI43_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI43_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI43_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -672,10 +658,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f64_nonans_noinfs: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI44_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI44_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI44_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -689,9 +674,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI45_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI45_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI45_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v10, ft0 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -706,9 +691,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI46_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v12, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI46_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v12, ft0 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -722,13 +707,11 @@ define double @vreduce_fmin_nxv16f64( %v) { ; CHECK-LABEL: vreduce_fmin_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI47_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI47_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v16, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI47_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -742,9 +725,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI48_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI48_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI48_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -757,9 +740,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f16_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI49_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI49_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI49_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -772,9 +755,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f16_nonans_noinfs: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI50_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI50_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI50_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -789,9 +772,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI51_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI51_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI51_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -806,10 +789,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI52_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI52_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI52_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -822,13 +804,11 @@ define half @vreduce_fmax_nxv64f16( %v) { ; CHECK-LABEL: vreduce_fmax_nxv64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI53_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI53_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v16, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI53_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -842,9 +822,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI54_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI54_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI54_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -857,9 +837,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f32_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI55_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI55_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI55_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -872,9 +852,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f32_nonans_noinfs: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI56_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI56_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI56_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -889,10 +869,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI57_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI57_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI57_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -906,9 +885,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI58_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI58_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v10, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI58_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v10, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -922,13 +901,11 @@ define float @vreduce_fmax_nxv32f32( %v) { ; CHECK-LABEL: vreduce_fmax_nxv32f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI59_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI59_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v16, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI59_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -942,10 +919,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI60_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI60_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI60_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -957,10 +933,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f64_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI61_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI61_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI61_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -972,10 +947,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f64_nonans_noinfs: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI62_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI62_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI62_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -989,9 +963,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI63_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI63_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI63_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v10, ft0 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1006,9 +980,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI64_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI64_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v12, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI64_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v12, ft0 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1022,13 +996,11 @@ define double @vreduce_fmax_nxv16f64( %v) { ; CHECK-LABEL: vreduce_fmax_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI65_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI65_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v16, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI65_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1039,7 +1011,7 @@ define float @vreduce_nsz_fadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_nsz_fadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v9 @@ -1066,9 +1038,8 @@ ; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1090,7 +1061,7 @@ ; CHECK-NEXT: vfmv.v.f v10, ft0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v9, v10, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v10 @@ -1118,7 +1089,7 @@ ; CHECK-NEXT: vslideup.vi v11, v12, 0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v11, v12, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v12 @@ -1133,11 +1104,10 @@ define half @vreduce_ord_fadd_nxv12f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv12f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: fmv.h.x ft0, zero ; CHECK-NEXT: fneg.h ft0, ft0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v11, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v12 @@ -1162,9 +1132,8 @@ ; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1184,7 +1153,7 @@ ; CHECK-NEXT: vfmv.v.f v10, ft0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v9, v10, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v10 @@ -1212,7 +1181,7 @@ ; CHECK-NEXT: vslideup.vi v11, v12, 0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v11, v12, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v12 @@ -1229,9 +1198,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI74_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI74_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v12, ft0 ; CHECK-NEXT: vfmv.v.f v11, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v12 Index: llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll +++ llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll @@ -9,7 +9,7 @@ define signext i8 @vreduce_add_nxv1i8( %v) { ; CHECK-LABEL: vreduce_add_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v9 @@ -24,7 +24,7 @@ define signext i8 @vreduce_umax_nxv1i8( %v) { ; CHECK-LABEL: vreduce_umax_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 @@ -40,7 +40,7 @@ ; CHECK-LABEL: vreduce_smax_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v9 @@ -55,7 +55,7 @@ define signext i8 @vreduce_umin_nxv1i8( %v) { ; CHECK-LABEL: vreduce_umin_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 @@ -71,7 +71,7 @@ ; CHECK-LABEL: vreduce_smin_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vredmin.vs v8, v8, v9 @@ -86,7 +86,7 @@ define signext i8 @vreduce_and_nxv1i8( %v) { ; CHECK-LABEL: vreduce_and_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 @@ -101,7 +101,7 @@ define signext i8 @vreduce_or_nxv1i8( %v) { ; CHECK-LABEL: vreduce_or_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v9 @@ -116,7 +116,7 @@ define signext i8 @vreduce_xor_nxv1i8( %v) { ; CHECK-LABEL: vreduce_xor_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v9 @@ -131,7 +131,7 @@ define signext i8 @vreduce_add_nxv2i8( %v) { ; CHECK-LABEL: vreduce_add_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v9 @@ -146,7 +146,7 @@ define signext i8 @vreduce_umax_nxv2i8( %v) { ; CHECK-LABEL: vreduce_umax_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 @@ -162,7 +162,7 @@ ; CHECK-LABEL: vreduce_smax_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v9 @@ -177,7 +177,7 @@ define signext i8 @vreduce_umin_nxv2i8( %v) { ; CHECK-LABEL: vreduce_umin_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 @@ -193,7 +193,7 @@ ; CHECK-LABEL: vreduce_smin_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vredmin.vs v8, v8, v9 @@ -208,7 +208,7 @@ define signext i8 @vreduce_and_nxv2i8( %v) { ; CHECK-LABEL: vreduce_and_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 @@ -223,7 +223,7 @@ define signext i8 @vreduce_or_nxv2i8( %v) { ; CHECK-LABEL: vreduce_or_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v9 @@ -238,7 +238,7 @@ define signext i8 @vreduce_xor_nxv2i8( %v) { ; CHECK-LABEL: vreduce_xor_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v9 @@ -253,7 +253,7 @@ define signext i8 @vreduce_add_nxv4i8( %v) { ; CHECK-LABEL: vreduce_add_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v9 @@ -268,7 +268,7 @@ define signext i8 @vreduce_umax_nxv4i8( %v) { ; CHECK-LABEL: vreduce_umax_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 @@ -284,7 +284,7 @@ ; CHECK-LABEL: vreduce_smax_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v9 @@ -299,7 +299,7 @@ define signext i8 @vreduce_umin_nxv4i8( %v) { ; CHECK-LABEL: vreduce_umin_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 @@ -315,7 +315,7 @@ ; CHECK-LABEL: vreduce_smin_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vredmin.vs v8, v8, v9 @@ -330,7 +330,7 @@ define signext i8 @vreduce_and_nxv4i8( %v) { ; CHECK-LABEL: vreduce_and_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 @@ -345,7 +345,7 @@ define signext i8 @vreduce_or_nxv4i8( %v) { ; CHECK-LABEL: vreduce_or_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v9 @@ -360,7 +360,7 @@ define signext i8 @vreduce_xor_nxv4i8( %v) { ; CHECK-LABEL: vreduce_xor_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v9 @@ -375,7 +375,7 @@ define signext i16 @vreduce_add_nxv1i16( %v) { ; CHECK-LABEL: vreduce_add_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v9 @@ -388,7 +388,7 @@ define signext i16 @vwreduce_add_nxv1i8( %v) { ; CHECK-LABEL: vwreduce_add_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -403,7 +403,7 @@ define signext i16 @vwreduce_uadd_nxv1i8( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -420,7 +420,7 @@ define signext i16 @vreduce_umax_nxv1i16( %v) { ; CHECK-LABEL: vreduce_umax_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 @@ -436,7 +436,7 @@ ; CHECK-LABEL: vreduce_smax_nxv1i16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v9 @@ -451,7 +451,7 @@ define signext i16 @vreduce_umin_nxv1i16( %v) { ; CHECK-LABEL: vreduce_umin_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 @@ -468,7 +468,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; RV32-NEXT: vmv.s.x v9, a0 ; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v9 @@ -479,7 +479,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v9 @@ -494,7 +494,7 @@ define signext i16 @vreduce_and_nxv1i16( %v) { ; CHECK-LABEL: vreduce_and_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 @@ -509,7 +509,7 @@ define signext i16 @vreduce_or_nxv1i16( %v) { ; CHECK-LABEL: vreduce_or_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v9 @@ -524,7 +524,7 @@ define signext i16 @vreduce_xor_nxv1i16( %v) { ; CHECK-LABEL: vreduce_xor_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v9 @@ -539,7 +539,7 @@ define signext i16 @vreduce_add_nxv2i16( %v) { ; CHECK-LABEL: vreduce_add_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v9 @@ -552,7 +552,7 @@ define signext i16 @vwreduce_add_nxv2i8( %v) { ; CHECK-LABEL: vwreduce_add_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -567,7 +567,7 @@ define signext i16 @vwreduce_uadd_nxv2i8( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -584,7 +584,7 @@ define signext i16 @vreduce_umax_nxv2i16( %v) { ; CHECK-LABEL: vreduce_umax_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 @@ -600,7 +600,7 @@ ; CHECK-LABEL: vreduce_smax_nxv2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v9 @@ -615,7 +615,7 @@ define signext i16 @vreduce_umin_nxv2i16( %v) { ; CHECK-LABEL: vreduce_umin_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 @@ -632,7 +632,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; RV32-NEXT: vmv.s.x v9, a0 ; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v9 @@ -643,7 +643,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v9 @@ -658,7 +658,7 @@ define signext i16 @vreduce_and_nxv2i16( %v) { ; CHECK-LABEL: vreduce_and_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 @@ -673,7 +673,7 @@ define signext i16 @vreduce_or_nxv2i16( %v) { ; CHECK-LABEL: vreduce_or_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v9 @@ -688,7 +688,7 @@ define signext i16 @vreduce_xor_nxv2i16( %v) { ; CHECK-LABEL: vreduce_xor_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v9 @@ -703,9 +703,8 @@ define signext i16 @vreduce_add_nxv4i16( %v) { ; CHECK-LABEL: vreduce_add_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -716,9 +715,9 @@ define signext i16 @vwreduce_add_nxv4i8( %v) { ; CHECK-LABEL: vwreduce_add_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -731,9 +730,9 @@ define signext i16 @vwreduce_uadd_nxv4i8( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -748,9 +747,8 @@ define signext i16 @vreduce_umax_nxv4i16( %v) { ; CHECK-LABEL: vreduce_umax_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -764,9 +762,8 @@ ; CHECK-LABEL: vreduce_smax_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -779,9 +776,8 @@ define signext i16 @vreduce_umin_nxv4i16( %v) { ; CHECK-LABEL: vreduce_umin_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -796,9 +792,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -807,9 +802,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -822,9 +816,8 @@ define signext i16 @vreduce_and_nxv4i16( %v) { ; CHECK-LABEL: vreduce_and_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -837,9 +830,8 @@ define signext i16 @vreduce_or_nxv4i16( %v) { ; CHECK-LABEL: vreduce_or_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -852,9 +844,8 @@ define signext i16 @vreduce_xor_nxv4i16( %v) { ; CHECK-LABEL: vreduce_xor_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -867,7 +858,7 @@ define signext i32 @vreduce_add_nxv1i32( %v) { ; CHECK-LABEL: vreduce_add_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v9 @@ -880,7 +871,7 @@ define signext i32 @vwreduce_add_nxv1i16( %v) { ; CHECK-LABEL: vwreduce_add_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -895,7 +886,7 @@ define signext i32 @vwreduce_uadd_nxv1i16( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 @@ -912,7 +903,7 @@ define signext i32 @vreduce_umax_nxv1i32( %v) { ; CHECK-LABEL: vreduce_umax_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 @@ -928,7 +919,7 @@ ; CHECK-LABEL: vreduce_smax_nxv1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v9 @@ -943,7 +934,7 @@ define signext i32 @vreduce_umin_nxv1i32( %v) { ; CHECK-LABEL: vreduce_umin_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 @@ -960,7 +951,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV32-NEXT: vmv.s.x v9, a0 ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v9 @@ -971,7 +962,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v9 @@ -986,7 +977,7 @@ define signext i32 @vreduce_and_nxv1i32( %v) { ; CHECK-LABEL: vreduce_and_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 @@ -1001,7 +992,7 @@ define signext i32 @vreduce_or_nxv1i32( %v) { ; CHECK-LABEL: vreduce_or_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v9 @@ -1016,7 +1007,7 @@ define signext i32 @vreduce_xor_nxv1i32( %v) { ; CHECK-LABEL: vreduce_xor_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v9 @@ -1031,9 +1022,8 @@ define signext i32 @vreduce_add_nxv2i32( %v) { ; CHECK-LABEL: vreduce_add_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1044,9 +1034,9 @@ define signext i32 @vwreduce_add_nxv2i16( %v) { ; CHECK-LABEL: vwreduce_add_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -1059,9 +1049,9 @@ define signext i32 @vwreduce_uadd_nxv2i16( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -1076,9 +1066,8 @@ define signext i32 @vreduce_umax_nxv2i32( %v) { ; CHECK-LABEL: vreduce_umax_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1092,9 +1081,8 @@ ; CHECK-LABEL: vreduce_smax_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1107,9 +1095,8 @@ define signext i32 @vreduce_umin_nxv2i32( %v) { ; CHECK-LABEL: vreduce_umin_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1124,9 +1111,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -1135,9 +1121,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1150,9 +1135,8 @@ define signext i32 @vreduce_and_nxv2i32( %v) { ; CHECK-LABEL: vreduce_and_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1165,9 +1149,8 @@ define signext i32 @vreduce_or_nxv2i32( %v) { ; CHECK-LABEL: vreduce_or_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1180,9 +1163,8 @@ define signext i32 @vreduce_xor_nxv2i32( %v) { ; CHECK-LABEL: vreduce_xor_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1195,7 +1177,7 @@ define signext i32 @vreduce_add_nxv4i32( %v) { ; CHECK-LABEL: vreduce_add_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v10 @@ -1208,7 +1190,7 @@ define signext i32 @vwreduce_add_nxv4i16( %v) { ; CHECK-LABEL: vwreduce_add_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -1223,7 +1205,7 @@ define signext i32 @vwreduce_uadd_nxv4i16( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 @@ -1240,7 +1222,7 @@ define signext i32 @vreduce_umax_nxv4i32( %v) { ; CHECK-LABEL: vreduce_umax_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v10 @@ -1256,7 +1238,7 @@ ; CHECK-LABEL: vreduce_smax_nxv4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v10 @@ -1271,7 +1253,7 @@ define signext i32 @vreduce_umin_nxv4i32( %v) { ; CHECK-LABEL: vreduce_umin_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v10 @@ -1288,7 +1270,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV32-NEXT: vmv.s.x v10, a0 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v10 @@ -1299,7 +1281,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v10 @@ -1314,7 +1296,7 @@ define signext i32 @vreduce_and_nxv4i32( %v) { ; CHECK-LABEL: vreduce_and_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v10 @@ -1329,7 +1311,7 @@ define signext i32 @vreduce_or_nxv4i32( %v) { ; CHECK-LABEL: vreduce_or_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v10 @@ -1344,7 +1326,7 @@ define signext i32 @vreduce_xor_nxv4i32( %v) { ; CHECK-LABEL: vreduce_xor_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v10 @@ -1372,9 +1354,8 @@ ; ; RV64-LABEL: vreduce_add_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vredsum.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1399,9 +1380,9 @@ ; ; RV64-LABEL: vwreduce_add_nxv1i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1428,9 +1409,9 @@ ; ; RV64-LABEL: vwreduce_uadd_nxv1i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1458,9 +1439,8 @@ ; ; RV64-LABEL: vreduce_umax_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vredmaxu.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1495,9 +1475,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vredmax.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1523,9 +1502,8 @@ ; ; RV64-LABEL: vreduce_umin_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, -1 ; RV64-NEXT: vredminu.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1562,9 +1540,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1590,9 +1567,8 @@ ; ; RV64-LABEL: vreduce_and_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, -1 ; RV64-NEXT: vredand.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1618,9 +1594,8 @@ ; ; RV64-LABEL: vreduce_or_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vredor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1646,9 +1621,8 @@ ; ; RV64-LABEL: vreduce_xor_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vredxor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1674,7 +1648,7 @@ ; ; RV64-LABEL: vreduce_add_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vredsum.vs v8, v8, v10 @@ -1701,7 +1675,7 @@ ; ; RV64-LABEL: vwreduce_add_nxv2i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v9 @@ -1730,7 +1704,7 @@ ; ; RV64-LABEL: vwreduce_uadd_nxv2i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v9 @@ -1760,7 +1734,7 @@ ; ; RV64-LABEL: vreduce_umax_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vredmaxu.vs v8, v8, v10 @@ -1797,7 +1771,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vredmax.vs v8, v8, v10 @@ -1825,7 +1799,7 @@ ; ; RV64-LABEL: vreduce_umin_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v10, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v10 @@ -1864,7 +1838,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v10 @@ -1892,7 +1866,7 @@ ; ; RV64-LABEL: vreduce_and_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v10, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v10 @@ -1920,7 +1894,7 @@ ; ; RV64-LABEL: vreduce_or_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vredor.vs v8, v8, v10 @@ -1948,7 +1922,7 @@ ; ; RV64-LABEL: vreduce_xor_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vredxor.vs v8, v8, v10 @@ -1976,7 +1950,7 @@ ; ; RV64-LABEL: vreduce_add_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vredsum.vs v8, v8, v12 @@ -2003,7 +1977,7 @@ ; ; RV64-LABEL: vwreduce_add_nxv4i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v10 @@ -2032,7 +2006,7 @@ ; ; RV64-LABEL: vwreduce_uadd_nxv4i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v10 @@ -2062,7 +2036,7 @@ ; ; RV64-LABEL: vreduce_umax_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vredmaxu.vs v8, v8, v12 @@ -2099,7 +2073,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, a0 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vredmax.vs v8, v8, v12 @@ -2127,7 +2101,7 @@ ; ; RV64-LABEL: vreduce_umin_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v12 @@ -2166,7 +2140,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, a0 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v12 @@ -2194,7 +2168,7 @@ ; ; RV64-LABEL: vreduce_and_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v12 @@ -2222,7 +2196,7 @@ ; ; RV64-LABEL: vreduce_or_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vredor.vs v8, v8, v12 @@ -2250,7 +2224,7 @@ ; ; RV64-LABEL: vreduce_xor_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vredxor.vs v8, v8, v12