diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5853,18 +5853,22 @@ const MVT VecVT = Vec.getSimpleValueType(); const MVT M1VT = getLMUL1VT(VecVT); const MVT XLenVT = Subtarget.getXLenVT(); + const bool NonZeroAVL = hasNonZeroAVL(VL); // The reduction needs an LMUL1 input; do the splat at either LMUL1 // or the original VT if fractional. auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT; - SDValue InitialValue = - lowerScalarInsert(StartValue, DAG.getConstant(1, DL, XLenVT), - InnerVT, DL, DAG, Subtarget); + // We reuse the VL of the reduction to reduce vsetvli toggles if we can + // prove it is non-zero. For the AVL=0 case, we need the scalar to + // be the result of the reduction operation. + auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT); + SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, + DAG, Subtarget); if (M1VT != InnerVT) InitialValue = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT), InitialValue, DAG.getConstant(0, DL, XLenVT)); - SDValue PassThru = hasNonZeroAVL(VL) ? DAG.getUNDEF(M1VT) : InitialValue; + SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue; SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, PassThru, Vec, InitialValue, Mask, VL); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction, @@ -8068,7 +8072,7 @@ ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL) return SDValue(); - if (!isOneConstant(ScalarV.getOperand(2))) + if (!hasNonZeroAVL(ScalarV.getOperand(2))) return SDValue(); // Check the scalar of ScalarV is neutral element diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV32 %s +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,RV64 %s declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>) @@ -159,9 +159,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -176,9 +174,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -195,9 +191,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -212,9 +206,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -233,11 +225,9 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vfadd.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vfredusum.vs v8, v8, v16 +; CHECK-NEXT: vfredusum.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x half>, <128 x half>* %x @@ -248,20 +238,16 @@ define half @vreduce_ord_fadd_v128f16(<128 x half>* %x, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_v128f16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: li a2, 64 -; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v24, fa0 -; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vfredosum.vs v16, v16, v24 -; CHECK-NEXT: vfmv.f.s ft0, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, ft0 -; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vfredosum.vs v8, v8, v16 +; CHECK-NEXT: vfredosum.vs v8, v8, v24 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: vfredosum.vs v8, v16, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x half>, <128 x half>* %x @@ -494,7 +480,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 @@ -512,7 +498,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 @@ -560,7 +546,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v10 @@ -578,7 +564,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v10 @@ -599,9 +585,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -616,9 +600,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -633,7 +615,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v12 @@ -652,7 +634,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v12 @@ -675,11 +657,9 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vfadd.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vfredusum.vs v8, v8, v16 +; CHECK-NEXT: vfredusum.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <64 x float>, <64 x float>* %x @@ -690,20 +670,16 @@ define float @vreduce_ord_fadd_v64f32(<64 x float>* %x, float %s) { ; CHECK-LABEL: vreduce_ord_fadd_v64f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: li a2, 32 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v24, fa0 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vfredosum.vs v16, v16, v24 -; CHECK-NEXT: vfmv.f.s ft0, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, ft0 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vfredosum.vs v8, v8, v16 +; CHECK-NEXT: vfredosum.vs v8, v8, v24 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: vfredosum.vs v8, v16, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <64 x float>, <64 x float>* %x @@ -720,12 +696,12 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vfredusum.vs v8, v24, v8 +; CHECK-NEXT: vfwadd.vv v0, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfredusum.vs v8, v0, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <64 x half>, <64 x half>* %x @@ -743,13 +719,13 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v16, a0 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v16, v16, v24 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s ft0, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v16 @@ -923,7 +899,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 @@ -941,7 +917,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 @@ -989,7 +965,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v10 @@ -1007,7 +983,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v10 @@ -1055,7 +1031,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v12 @@ -1073,7 +1049,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v12 @@ -1134,7 +1110,7 @@ ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v24, v8 @@ -1154,13 +1130,13 @@ ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v16, 16 -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v16, v16, v24 ; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s ft0, v16 -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v16 @@ -1176,18 +1152,27 @@ declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>) define half @vreduce_fmin_v2f16(<2 x half>* %x) { -; CHECK-LABEL: vreduce_fmin_v2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI68_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI68_0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v2f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI68_0) +; RV32-NEXT: flh ft0, %lo(.LCPI68_0)(a1) +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v2f16: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI68_0) +; RV64-NEXT: flh ft0, %lo(.LCPI68_0)(a1) +; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <2 x half>, <2 x half>* %x %red = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> %v) ret half %red @@ -1196,54 +1181,81 @@ declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>) define half @vreduce_fmin_v4f16(<4 x half>* %x) { -; CHECK-LABEL: vreduce_fmin_v4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI69_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI69_0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v4f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI69_0) +; RV32-NEXT: flh ft0, %lo(.LCPI69_0)(a1) +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v4f16: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI69_0) +; RV64-NEXT: flh ft0, %lo(.LCPI69_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x half>, <4 x half>* %x %red = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v) ret half %red } define half @vreduce_fmin_v4f16_nonans(<4 x half>* %x) { -; CHECK-LABEL: vreduce_fmin_v4f16_nonans: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI70_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI70_0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v4f16_nonans: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI70_0) +; RV32-NEXT: flh ft0, %lo(.LCPI70_0)(a1) +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v4f16_nonans: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI70_0) +; RV64-NEXT: flh ft0, %lo(.LCPI70_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x half>, <4 x half>* %x %red = call nnan half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v) ret half %red } define half @vreduce_fmin_v4f16_nonans_noinfs(<4 x half>* %x) { -; CHECK-LABEL: vreduce_fmin_v4f16_nonans_noinfs: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI71_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI71_0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v4f16_nonans_noinfs: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI71_0) +; RV32-NEXT: flh ft0, %lo(.LCPI71_0)(a1) +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v4f16_nonans_noinfs: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI71_0) +; RV64-NEXT: flh ft0, %lo(.LCPI71_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x half>, <4 x half>* %x %red = call nnan ninf half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v) ret half %red @@ -1258,14 +1270,12 @@ ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: lui a1, %hi(.LCPI72_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI72_0)(a1) ; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vfmv.s.f v24, ft0 ; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: lui a0, %hi(.LCPI72_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI72_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v16, (a0), zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v16 +; CHECK-NEXT: vfredmin.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x half>, <128 x half>* %x @@ -1276,18 +1286,27 @@ declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) define float @vreduce_fmin_v2f32(<2 x float>* %x) { -; CHECK-LABEL: vreduce_fmin_v2f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI73_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI73_0) -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v2f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI73_0) +; RV32-NEXT: flw ft0, %lo(.LCPI73_0)(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v2f32: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI73_0) +; RV64-NEXT: flw ft0, %lo(.LCPI73_0)(a1) +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <2 x float>, <2 x float>* %x %red = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %v) ret float %red @@ -1296,54 +1315,81 @@ declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) define float @vreduce_fmin_v4f32(<4 x float>* %x) { -; CHECK-LABEL: vreduce_fmin_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI74_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI74_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v4f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI74_0) +; RV32-NEXT: flw ft0, %lo(.LCPI74_0)(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v4f32: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI74_0) +; RV64-NEXT: flw ft0, %lo(.LCPI74_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x float>, <4 x float>* %x %red = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) ret float %red } define float @vreduce_fmin_v4f32_nonans(<4 x float>* %x) { -; CHECK-LABEL: vreduce_fmin_v4f32_nonans: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI75_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI75_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v4f32_nonans: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI75_0) +; RV32-NEXT: flw ft0, %lo(.LCPI75_0)(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v4f32_nonans: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI75_0) +; RV64-NEXT: flw ft0, %lo(.LCPI75_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x float>, <4 x float>* %x %red = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) ret float %red } define float @vreduce_fmin_v4f32_nonans_noinfs(<4 x float>* %x) { -; CHECK-LABEL: vreduce_fmin_v4f32_nonans_noinfs: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI76_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI76_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v4f32_nonans_noinfs: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI76_0) +; RV32-NEXT: flw ft0, %lo(.LCPI76_0)(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v4f32_nonans_noinfs: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI76_0) +; RV64-NEXT: flw ft0, %lo(.LCPI76_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x float>, <4 x float>* %x %red = call nnan ninf float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) ret float %red @@ -1357,20 +1403,18 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: addi a2, a0, 384 -; CHECK-NEXT: vle32.v v16, (a2) -; CHECK-NEXT: addi a2, a0, 256 +; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: addi a1, a0, 256 ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: vle32.v v0, (a2) +; CHECK-NEXT: vle32.v v0, (a1) +; CHECK-NEXT: lui a0, %hi(.LCPI77_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI77_0)(a0) ; CHECK-NEXT: vfmin.vv v16, v24, v16 ; CHECK-NEXT: vfmin.vv v8, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: lui a0, %hi(.LCPI77_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI77_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v16, (a0), zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1382,18 +1426,27 @@ declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) define double @vreduce_fmin_v2f64(<2 x double>* %x) { -; CHECK-LABEL: vreduce_fmin_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI78_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI78_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI78_0) +; RV32-NEXT: fld ft0, %lo(.LCPI78_0)(a1) +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI78_0) +; RV64-NEXT: fld ft0, %lo(.LCPI78_0)(a1) +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <2 x double>, <2 x double>* %x %red = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %v) ret double %red @@ -1402,54 +1455,81 @@ declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) define double @vreduce_fmin_v4f64(<4 x double>* %x) { -; CHECK-LABEL: vreduce_fmin_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI79_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI79_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI79_0) +; RV32-NEXT: fld ft0, %lo(.LCPI79_0)(a1) +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vfmv.s.f v10, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v10 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI79_0) +; RV64-NEXT: fld ft0, %lo(.LCPI79_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vfmv.s.f v10, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v10 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x double>, <4 x double>* %x %red = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v) ret double %red } define double @vreduce_fmin_v4f64_nonans(<4 x double>* %x) { -; CHECK-LABEL: vreduce_fmin_v4f64_nonans: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI80_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI80_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v4f64_nonans: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI80_0) +; RV32-NEXT: fld ft0, %lo(.LCPI80_0)(a1) +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vfmv.s.f v10, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v10 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v4f64_nonans: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI80_0) +; RV64-NEXT: fld ft0, %lo(.LCPI80_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vfmv.s.f v10, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v10 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x double>, <4 x double>* %x %red = call nnan double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v) ret double %red } define double @vreduce_fmin_v4f64_nonans_noinfs(<4 x double>* %x) { -; CHECK-LABEL: vreduce_fmin_v4f64_nonans_noinfs: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI81_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI81_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v4f64_nonans_noinfs: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI81_0) +; RV32-NEXT: fld ft0, %lo(.LCPI81_0)(a1) +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vfmv.s.f v10, ft0 +; RV32-NEXT: vfredmin.vs v8, v8, v10 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v4f64_nonans_noinfs: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI81_0) +; RV64-NEXT: fld ft0, %lo(.LCPI81_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vfmv.s.f v10, ft0 +; RV64-NEXT: vfredmin.vs v8, v8, v10 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x double>, <4 x double>* %x %red = call nnan ninf double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v) ret double %red @@ -1458,21 +1538,33 @@ declare double @llvm.vector.reduce.fmin.v32f64(<32 x double>) define double @vreduce_fmin_v32f64(<32 x double>* %x) { -; CHECK-LABEL: vreduce_fmin_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: lui a0, %hi(.LCPI82_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI82_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v16 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmin_v32f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: lui a1, %hi(.LCPI82_0) +; RV32-NEXT: fld ft0, %lo(.LCPI82_0)(a1) +; RV32-NEXT: vle64.v v16, (a0) +; RV32-NEXT: vfmv.s.f v24, ft0 +; RV32-NEXT: vfmin.vv v8, v8, v16 +; RV32-NEXT: vfredmin.vs v8, v8, v24 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmin_v32f64: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI82_0) +; RV64-NEXT: fld ft0, %lo(.LCPI82_0)(a1) +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vfmv.s.f v24, ft0 +; RV64-NEXT: vfmin.vv v8, v8, v16 +; RV64-NEXT: vfredmin.vs v8, v8, v24 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <32 x double>, <32 x double>* %x %red = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %v) ret double %red @@ -1481,18 +1573,27 @@ declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>) define half @vreduce_fmax_v2f16(<2 x half>* %x) { -; CHECK-LABEL: vreduce_fmax_v2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI83_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI83_0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v2f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI83_0) +; RV32-NEXT: flh ft0, %lo(.LCPI83_0)(a1) +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v2f16: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI83_0) +; RV64-NEXT: flh ft0, %lo(.LCPI83_0)(a1) +; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <2 x half>, <2 x half>* %x %red = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> %v) ret half %red @@ -1501,54 +1602,81 @@ declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>) define half @vreduce_fmax_v4f16(<4 x half>* %x) { -; CHECK-LABEL: vreduce_fmax_v4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI84_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI84_0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v4f16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI84_0) +; RV32-NEXT: flh ft0, %lo(.LCPI84_0)(a1) +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v4f16: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI84_0) +; RV64-NEXT: flh ft0, %lo(.LCPI84_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x half>, <4 x half>* %x %red = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v) ret half %red } define half @vreduce_fmax_v4f16_nonans(<4 x half>* %x) { -; CHECK-LABEL: vreduce_fmax_v4f16_nonans: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI85_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI85_0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v4f16_nonans: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI85_0) +; RV32-NEXT: flh ft0, %lo(.LCPI85_0)(a1) +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v4f16_nonans: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI85_0) +; RV64-NEXT: flh ft0, %lo(.LCPI85_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x half>, <4 x half>* %x %red = call nnan half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v) ret half %red } define half @vreduce_fmax_v4f16_nonans_noinfs(<4 x half>* %x) { -; CHECK-LABEL: vreduce_fmax_v4f16_nonans_noinfs: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI86_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI86_0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v4f16_nonans_noinfs: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI86_0) +; RV32-NEXT: flh ft0, %lo(.LCPI86_0)(a1) +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v4f16_nonans_noinfs: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI86_0) +; RV64-NEXT: flh ft0, %lo(.LCPI86_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x half>, <4 x half>* %x %red = call nnan ninf half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v) ret half %red @@ -1563,14 +1691,12 @@ ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: lui a1, %hi(.LCPI87_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI87_0)(a1) ; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vfmv.s.f v24, ft0 ; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: lui a0, %hi(.LCPI87_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI87_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v16, (a0), zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v16 +; CHECK-NEXT: vfredmax.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x half>, <128 x half>* %x @@ -1581,18 +1707,27 @@ declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>) define float @vreduce_fmax_v2f32(<2 x float>* %x) { -; CHECK-LABEL: vreduce_fmax_v2f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI88_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI88_0) -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v2f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI88_0) +; RV32-NEXT: flw ft0, %lo(.LCPI88_0)(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v2f32: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI88_0) +; RV64-NEXT: flw ft0, %lo(.LCPI88_0)(a1) +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <2 x float>, <2 x float>* %x %red = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %v) ret float %red @@ -1601,54 +1736,81 @@ declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) define float @vreduce_fmax_v4f32(<4 x float>* %x) { -; CHECK-LABEL: vreduce_fmax_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI89_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI89_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v4f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI89_0) +; RV32-NEXT: flw ft0, %lo(.LCPI89_0)(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v4f32: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI89_0) +; RV64-NEXT: flw ft0, %lo(.LCPI89_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x float>, <4 x float>* %x %red = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) ret float %red } define float @vreduce_fmax_v4f32_nonans(<4 x float>* %x) { -; CHECK-LABEL: vreduce_fmax_v4f32_nonans: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI90_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI90_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v4f32_nonans: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI90_0) +; RV32-NEXT: flw ft0, %lo(.LCPI90_0)(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v4f32_nonans: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI90_0) +; RV64-NEXT: flw ft0, %lo(.LCPI90_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x float>, <4 x float>* %x %red = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) ret float %red } define float @vreduce_fmax_v4f32_nonans_noinfs(<4 x float>* %x) { -; CHECK-LABEL: vreduce_fmax_v4f32_nonans_noinfs: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI91_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI91_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v4f32_nonans_noinfs: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI91_0) +; RV32-NEXT: flw ft0, %lo(.LCPI91_0)(a1) +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v4f32_nonans_noinfs: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI91_0) +; RV64-NEXT: flw ft0, %lo(.LCPI91_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x float>, <4 x float>* %x %red = call nnan ninf float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) ret float %red @@ -1662,20 +1824,18 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: addi a2, a0, 384 -; CHECK-NEXT: vle32.v v16, (a2) -; CHECK-NEXT: addi a2, a0, 256 +; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: addi a1, a0, 256 ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: vle32.v v0, (a2) +; CHECK-NEXT: vle32.v v0, (a1) +; CHECK-NEXT: lui a0, %hi(.LCPI92_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI92_0)(a0) ; CHECK-NEXT: vfmax.vv v16, v24, v16 ; CHECK-NEXT: vfmax.vv v8, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: lui a0, %hi(.LCPI92_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI92_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v16, (a0), zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1687,18 +1847,27 @@ declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) define double @vreduce_fmax_v2f64(<2 x double>* %x) { -; CHECK-LABEL: vreduce_fmax_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI93_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI93_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI93_0) +; RV32-NEXT: fld ft0, %lo(.LCPI93_0)(a1) +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vfmv.s.f v9, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI93_0) +; RV64-NEXT: fld ft0, %lo(.LCPI93_0)(a1) +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vfmv.s.f v9, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <2 x double>, <2 x double>* %x %red = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %v) ret double %red @@ -1707,54 +1876,81 @@ declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) define double @vreduce_fmax_v4f64(<4 x double>* %x) { -; CHECK-LABEL: vreduce_fmax_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI94_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI94_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI94_0) +; RV32-NEXT: fld ft0, %lo(.LCPI94_0)(a1) +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vfmv.s.f v10, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v10 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI94_0) +; RV64-NEXT: fld ft0, %lo(.LCPI94_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vfmv.s.f v10, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v10 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x double>, <4 x double>* %x %red = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v) ret double %red } define double @vreduce_fmax_v4f64_nonans(<4 x double>* %x) { -; CHECK-LABEL: vreduce_fmax_v4f64_nonans: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI95_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI95_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v4f64_nonans: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI95_0) +; RV32-NEXT: fld ft0, %lo(.LCPI95_0)(a1) +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vfmv.s.f v10, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v10 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v4f64_nonans: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI95_0) +; RV64-NEXT: fld ft0, %lo(.LCPI95_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vfmv.s.f v10, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v10 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x double>, <4 x double>* %x %red = call nnan double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v) ret double %red } define double @vreduce_fmax_v4f64_nonans_noinfs(<4 x double>* %x) { -; CHECK-LABEL: vreduce_fmax_v4f64_nonans_noinfs: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI96_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI96_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v4f64_nonans_noinfs: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: lui a1, %hi(.LCPI96_0) +; RV32-NEXT: fld ft0, %lo(.LCPI96_0)(a1) +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vfmv.s.f v10, ft0 +; RV32-NEXT: vfredmax.vs v8, v8, v10 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v4f64_nonans_noinfs: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI96_0) +; RV64-NEXT: fld ft0, %lo(.LCPI96_0)(a1) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vfmv.s.f v10, ft0 +; RV64-NEXT: vfredmax.vs v8, v8, v10 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <4 x double>, <4 x double>* %x %red = call nnan ninf double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v) ret double %red @@ -1763,21 +1959,33 @@ declare double @llvm.vector.reduce.fmax.v32f64(<32 x double>) define double @vreduce_fmax_v32f64(<32 x double>* %x) { -; CHECK-LABEL: vreduce_fmax_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: lui a0, %hi(.LCPI97_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI97_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v16 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fmax_v32f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: lui a1, %hi(.LCPI97_0) +; RV32-NEXT: fld ft0, %lo(.LCPI97_0)(a1) +; RV32-NEXT: vle64.v v16, (a0) +; RV32-NEXT: vfmv.s.f v24, ft0 +; RV32-NEXT: vfmax.vv v8, v8, v16 +; RV32-NEXT: vfredmax.vs v8, v8, v24 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fmax_v32f64: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, %hi(.LCPI97_0) +; RV64-NEXT: fld ft0, %lo(.LCPI97_0)(a1) +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vfmv.s.f v24, ft0 +; RV64-NEXT: vfmax.vv v8, v8, v16 +; RV64-NEXT: vfredmax.vs v8, v8, v24 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: ret %v = load <32 x double>, <32 x double>* %x %red = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %v) ret double %red diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -88,9 +88,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -107,9 +105,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -126,9 +122,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -147,11 +141,9 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vadd.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vredsum.vs v8, v8, v16 +; CHECK-NEXT: vredsum.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -372,7 +364,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -390,7 +382,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 @@ -411,9 +403,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -428,7 +418,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v10 @@ -447,7 +437,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v10 @@ -468,9 +458,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -485,7 +473,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v12 @@ -504,7 +492,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v12 @@ -527,11 +515,9 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vadd.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vredsum.vs v8, v8, v16 +; CHECK-NEXT: vredsum.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -548,12 +534,12 @@ ; CHECK-NEXT: li a0, 64 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vredsum.vs v8, v24, v8 +; CHECK-NEXT: vwadd.vv v0, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vredsum.vs v8, v0, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i8>, <128 x i8>* %x @@ -571,12 +557,12 @@ ; CHECK-NEXT: li a0, 64 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vwaddu.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vredsum.vs v8, v24, v8 +; CHECK-NEXT: vwaddu.vv v0, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vredsum.vs v8, v0, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i8>, <128 x i8>* %x @@ -748,7 +734,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -766,7 +752,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 @@ -800,7 +786,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v10 @@ -818,7 +804,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v10 @@ -839,9 +825,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -856,7 +840,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v12 @@ -875,7 +859,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v12 @@ -898,11 +882,9 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vadd.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vredsum.vs v8, v8, v16 +; CHECK-NEXT: vredsum.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -919,12 +901,12 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vredsum.vs v8, v24, v8 +; CHECK-NEXT: vwadd.vv v0, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vredsum.vs v8, v0, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i16>, <64 x i16>* %x @@ -942,12 +924,12 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vwaddu.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vredsum.vs v8, v24, v8 +; CHECK-NEXT: vwaddu.vv v0, v8, v16 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vredsum.vs v8, v0, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i16>, <64 x i16>* %x @@ -1173,7 +1155,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v9 @@ -1207,7 +1189,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v9 @@ -1270,7 +1252,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v10 @@ -1304,7 +1286,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v10 @@ -1367,7 +1349,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v12 @@ -1401,7 +1383,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v12 @@ -1478,7 +1460,7 @@ ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwadd.vv v24, v8, v16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v8, zero ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredsum.vs v8, v24, v8 @@ -1519,7 +1501,7 @@ ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwaddu.vv v24, v8, v16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v8, zero ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredsum.vs v8, v24, v8 @@ -1881,9 +1863,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1899,9 +1879,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1917,9 +1895,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1935,9 +1911,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1954,7 +1928,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v10 @@ -1973,7 +1947,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, -1 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v12 @@ -1992,7 +1966,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v16 @@ -2014,7 +1988,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vand.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v16 @@ -2046,9 +2020,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2064,9 +2036,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2082,9 +2052,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2100,7 +2068,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v10 @@ -2119,7 +2087,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, -1 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v12 @@ -2138,7 +2106,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v16 @@ -2160,7 +2128,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vand.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v16 @@ -2192,9 +2160,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2210,9 +2176,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2228,7 +2192,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v10 @@ -2246,7 +2210,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, -1 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v12 @@ -2265,7 +2229,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v16 @@ -2287,7 +2251,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vand.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v16 @@ -2344,9 +2308,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v9, -1 -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2377,7 +2339,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v10, -1 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v10 @@ -2410,7 +2372,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v12 @@ -2443,7 +2405,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, -1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v16 @@ -2482,7 +2444,7 @@ ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, -1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v16 @@ -2533,7 +2495,7 @@ ; RV64-NEXT: vand.vv v16, v24, v16 ; RV64-NEXT: vand.vv v8, v8, v0 ; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, -1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v16 @@ -2630,9 +2592,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2649,9 +2609,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2668,9 +2626,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2689,11 +2645,9 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vredor.vs v8, v8, v16 +; CHECK-NEXT: vredor.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -2787,9 +2741,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2806,9 +2758,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2827,11 +2777,9 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vredor.vs v8, v8, v16 +; CHECK-NEXT: vredor.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -2925,9 +2873,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2946,11 +2892,9 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vredor.vs v8, v8, v16 +; CHECK-NEXT: vredor.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -3266,9 +3210,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3285,9 +3227,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3304,9 +3244,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3325,11 +3263,9 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vxor.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vredxor.vs v8, v8, v16 +; CHECK-NEXT: vredxor.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -3423,9 +3359,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3442,9 +3376,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3463,11 +3395,9 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vxor.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vredxor.vs v8, v8, v16 +; CHECK-NEXT: vredxor.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -3561,9 +3491,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3582,11 +3510,9 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vxor.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vredxor.vs v8, v8, v16 +; CHECK-NEXT: vredxor.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -3907,9 +3833,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vredmin.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3927,9 +3851,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vredmin.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3947,9 +3869,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredmin.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3968,12 +3888,10 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) -; CHECK-NEXT: vmin.vv v8, v8, v16 ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vredmin.vs v8, v8, v16 +; CHECK-NEXT: vmv.s.x v24, a0 +; CHECK-NEXT: vmin.vv v8, v8, v16 +; CHECK-NEXT: vredmin.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -4121,9 +4039,7 @@ ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV32-NEXT: vmv.s.x v12, a0 -; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4135,9 +4051,7 @@ ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4156,9 +4070,7 @@ ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV32-NEXT: vmv.s.x v16, a0 -; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4170,9 +4082,7 @@ ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4191,13 +4101,11 @@ ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle16.v v16, (a0) -; RV32-NEXT: vmin.vv v8, v8, v16 ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v16, a0 -; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; RV32-NEXT: vredmin.vs v8, v8, v16 +; RV32-NEXT: vmv.s.x v24, a0 +; RV32-NEXT: vmin.vv v8, v8, v16 +; RV32-NEXT: vredmin.vs v8, v8, v24 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; @@ -4208,13 +4116,11 @@ ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle16.v v16, (a0) -; RV64-NEXT: vmin.vv v8, v8, v16 ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; RV64-NEXT: vredmin.vs v8, v8, v16 +; RV64-NEXT: vmv.s.x v24, a0 +; RV64-NEXT: vmin.vv v8, v8, v16 +; RV64-NEXT: vredmin.vs v8, v8, v24 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -4362,9 +4268,7 @@ ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vmv.s.x v16, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4376,9 +4280,7 @@ ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4397,13 +4299,11 @@ ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle32.v v16, (a0) -; RV32-NEXT: vmin.vv v8, v8, v16 ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v16, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vredmin.vs v8, v8, v16 +; RV32-NEXT: vmv.s.x v24, a0 +; RV32-NEXT: vmin.vv v8, v8, v16 +; RV32-NEXT: vredmin.vs v8, v8, v24 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; @@ -4414,13 +4314,11 @@ ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle32.v v16, (a0) -; RV64-NEXT: vmin.vv v8, v8, v16 ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV64-NEXT: vredmin.vs v8, v8, v16 +; RV64-NEXT: vmv.s.x v24, a0 +; RV64-NEXT: vmin.vv v8, v8, v16 +; RV64-NEXT: vredmin.vs v8, v8, v24 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -4818,9 +4716,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4838,9 +4734,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4858,9 +4752,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4879,12 +4771,10 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) -; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vredmax.vs v8, v8, v16 +; CHECK-NEXT: vmv.s.x v24, a0 +; CHECK-NEXT: vmax.vv v8, v8, v16 +; CHECK-NEXT: vredmax.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -4983,9 +4873,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5003,9 +4891,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5024,12 +4910,10 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vredmax.vs v8, v8, v16 +; CHECK-NEXT: vmv.s.x v24, a0 +; CHECK-NEXT: vmax.vv v8, v8, v16 +; CHECK-NEXT: vredmax.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -5128,9 +5012,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5149,12 +5031,10 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vredmax.vs v8, v8, v16 +; CHECK-NEXT: vmv.s.x v24, a0 +; CHECK-NEXT: vmax.vv v8, v8, v16 +; CHECK-NEXT: vredmax.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -5470,9 +5350,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5488,9 +5366,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5506,9 +5382,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5524,9 +5398,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5543,7 +5415,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v10 @@ -5562,7 +5434,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, -1 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v12 @@ -5581,7 +5453,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v16 @@ -5603,7 +5475,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vminu.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v16 @@ -5635,9 +5507,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5653,9 +5523,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5671,9 +5539,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5689,7 +5555,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v10 @@ -5708,7 +5574,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, -1 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v12 @@ -5727,7 +5593,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v16 @@ -5749,7 +5615,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vminu.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v16 @@ -5781,9 +5647,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5799,9 +5663,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5817,7 +5679,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v10 @@ -5835,7 +5697,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, -1 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v12 @@ -5854,7 +5716,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v16 @@ -5876,7 +5738,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vminu.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v16 @@ -5933,9 +5795,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v9, -1 -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -5966,7 +5826,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v10, -1 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v10 @@ -5999,7 +5859,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v12 @@ -6032,7 +5892,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, -1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v16 @@ -6071,7 +5931,7 @@ ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vminu.vv v8, v8, v16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, -1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v16 @@ -6122,7 +5982,7 @@ ; RV64-NEXT: vminu.vv v16, v24, v16 ; RV64-NEXT: vminu.vv v8, v8, v0 ; RV64-NEXT: vminu.vv v8, v8, v16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, -1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v16 @@ -6219,9 +6079,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6238,9 +6096,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6257,9 +6113,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6278,11 +6132,9 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vmaxu.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vredmaxu.vs v8, v8, v16 +; CHECK-NEXT: vredmaxu.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -6376,9 +6228,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6395,9 +6245,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6416,11 +6264,9 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vmaxu.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vredmaxu.vs v8, v8, v16 +; CHECK-NEXT: vredmaxu.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -6514,9 +6360,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6535,11 +6379,9 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vmaxu.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vredmaxu.vs v8, v8, v16 +; CHECK-NEXT: vredmaxu.vs v8, v8, v24 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll --- a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll @@ -4,7 +4,7 @@ define i64 @reduce_add(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_add: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v10 @@ -19,7 +19,7 @@ define i64 @reduce_add2(<4 x i64> %v) { ; CHECK-LABEL: reduce_add2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v10 @@ -34,7 +34,7 @@ define i64 @reduce_and(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_and: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v10 @@ -49,7 +49,7 @@ define i64 @reduce_and2(<4 x i64> %v) { ; CHECK-LABEL: reduce_and2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v10 @@ -64,7 +64,7 @@ define i64 @reduce_or(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_or: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v10 @@ -79,7 +79,7 @@ define i64 @reduce_or2(<4 x i64> %v) { ; CHECK-LABEL: reduce_or2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v10 @@ -94,7 +94,7 @@ define i64 @reduce_xor(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_xor: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v10 @@ -109,7 +109,7 @@ define i64 @reduce_xor2(<4 x i64> %v) { ; CHECK-LABEL: reduce_xor2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v10 @@ -125,7 +125,7 @@ define i64 @reduce_umax(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_umax: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v10 @@ -140,7 +140,7 @@ define i64 @reduce_umax2(<4 x i64> %v) { ; CHECK-LABEL: reduce_umax2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v10 @@ -155,7 +155,7 @@ define i64 @reduce_umin(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_umin: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v10 @@ -170,7 +170,7 @@ define i64 @reduce_umin2(<4 x i64> %v) { ; CHECK-LABEL: reduce_umin2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v10 @@ -185,7 +185,7 @@ define i64 @reduce_smax(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_smax: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v10 @@ -200,7 +200,7 @@ define i64 @reduce_smax2(<4 x i64> %v) { ; CHECK-LABEL: reduce_smax2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v10 @@ -215,7 +215,7 @@ define i64 @reduce_smin(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_smin: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredmin.vs v8, v8, v10 @@ -230,7 +230,7 @@ define i64 @reduce_smin2(<4 x i64> %v) { ; CHECK-LABEL: reduce_smin2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vredmin.vs v8, v8, v10 @@ -245,9 +245,8 @@ define float @reduce_fadd(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fadd: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -259,9 +258,8 @@ define float @reduce_fadd2(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fadd2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -274,9 +272,8 @@ define float @reduce_fmax(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fmax: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -289,9 +286,8 @@ define float @reduce_fmin(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fmin: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -9,9 +9,8 @@ define half @vreduce_fadd_nxv1f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -22,9 +21,8 @@ define half @vreduce_ord_fadd_nxv1f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -37,9 +35,8 @@ define half @vreduce_fadd_nxv2f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -50,9 +47,8 @@ define half @vreduce_ord_fadd_nxv2f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -65,9 +61,8 @@ define half @vreduce_fadd_nxv4f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -78,9 +73,8 @@ define half @vreduce_ord_fadd_nxv4f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -93,9 +87,8 @@ define float @vreduce_fadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_fadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -106,9 +99,8 @@ define float @vreduce_ord_fadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -119,9 +111,9 @@ define float @vreduce_fwadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_fwadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -134,9 +126,9 @@ define float @vreduce_ord_fwadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -151,9 +143,8 @@ define float @vreduce_fadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: vreduce_fadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -164,9 +155,8 @@ define float @vreduce_ord_fadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -177,9 +167,9 @@ define float @vreduce_fwadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: vreduce_fwadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -192,9 +182,9 @@ define float @vreduce_ord_fwadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -209,7 +199,7 @@ define float @vreduce_fadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: vreduce_fadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v10 @@ -222,7 +212,7 @@ define float @vreduce_ord_fadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v10 @@ -235,7 +225,7 @@ define float @vreduce_fwadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: vreduce_fwadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 @@ -250,7 +240,7 @@ define float @vreduce_ord_fwadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 @@ -267,9 +257,8 @@ define double @vreduce_fadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: vreduce_fadd_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -280,9 +269,8 @@ define double @vreduce_ord_fadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -293,9 +281,9 @@ define double @vreduce_fwadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: vreduce_fwadd_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -308,9 +296,9 @@ define double @vreduce_ord_fwadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -325,7 +313,7 @@ define double @vreduce_fadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: vreduce_fadd_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v10 @@ -338,7 +326,7 @@ define double @vreduce_ord_fadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v10 @@ -351,7 +339,7 @@ define double @vreduce_fwadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: vreduce_fwadd_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 @@ -366,7 +354,7 @@ define double @vreduce_ord_fwadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 @@ -383,7 +371,7 @@ define double @vreduce_fadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: vreduce_fadd_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v12 @@ -396,7 +384,7 @@ define double @vreduce_ord_fadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v12 @@ -409,7 +397,7 @@ define double @vreduce_fwadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: vreduce_fwadd_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v10 @@ -424,7 +412,7 @@ define double @vreduce_ord_fwadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v10 @@ -442,10 +430,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI30_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI30_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -457,10 +444,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f16_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI31_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI31_0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI31_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -472,10 +458,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f16_nonans_noinfs: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI32_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI32_0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI32_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -489,10 +474,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI33_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI33_0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI33_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -506,10 +490,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI34_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI34_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI34_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -522,13 +505,11 @@ define half @vreduce_fmin_nxv64f16( %v) { ; CHECK-LABEL: vreduce_fmin_nxv64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI35_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI35_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v16, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI35_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -542,10 +523,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI36_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI36_0) -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI36_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -557,10 +537,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f32_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI37_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI37_0) -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI37_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -572,10 +551,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f32_nonans_noinfs: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI38_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI38_0) -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI38_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -589,10 +567,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI39_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI39_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI39_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -606,9 +583,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI40_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI40_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v10, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI40_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v10, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -622,13 +599,11 @@ define float @vreduce_fmin_nxv32f32( %v) { ; CHECK-LABEL: vreduce_fmin_nxv32f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI41_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI41_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v16, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI41_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -642,10 +617,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI42_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI42_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI42_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -657,10 +631,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f64_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI43_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI43_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI43_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -672,10 +645,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv1f64_nonans_noinfs: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI44_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI44_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI44_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -689,9 +661,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI45_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI45_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI45_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v10, ft0 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -706,9 +678,9 @@ ; CHECK-LABEL: vreduce_fmin_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI46_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v12, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI46_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v12, ft0 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -722,13 +694,11 @@ define double @vreduce_fmin_nxv16f64( %v) { ; CHECK-LABEL: vreduce_fmin_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI47_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI47_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v16, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI47_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -742,10 +712,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI48_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI48_0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI48_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -757,10 +726,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f16_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI49_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI49_0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI49_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -772,10 +740,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f16_nonans_noinfs: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI50_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI50_0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI50_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -789,10 +756,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI51_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI51_0) -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI51_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -806,10 +772,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI52_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI52_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v9, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI52_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -822,13 +787,11 @@ define half @vreduce_fmax_nxv64f16( %v) { ; CHECK-LABEL: vreduce_fmax_nxv64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI53_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI53_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v16, (a0), zero +; CHECK-NEXT: flh ft0, %lo(.LCPI53_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -842,10 +805,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI54_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI54_0) -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI54_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -857,10 +819,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f32_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI55_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI55_0) -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI55_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -872,10 +833,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f32_nonans_noinfs: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI56_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI56_0) -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI56_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -889,10 +849,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI57_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI57_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v9, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI57_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -906,9 +865,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI58_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI58_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v10, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI58_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v10, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -922,13 +881,11 @@ define float @vreduce_fmax_nxv32f32( %v) { ; CHECK-LABEL: vreduce_fmax_nxv32f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI59_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI59_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v16, (a0), zero +; CHECK-NEXT: flw ft0, %lo(.LCPI59_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -942,10 +899,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI60_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI60_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI60_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -957,10 +913,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f64_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI61_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI61_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI61_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -972,10 +927,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv1f64_nonans_noinfs: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI62_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI62_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v9, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI62_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -989,9 +943,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI63_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI63_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v10, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI63_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v10, ft0 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1006,9 +960,9 @@ ; CHECK-LABEL: vreduce_fmax_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI64_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI64_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v12, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI64_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v12, ft0 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1022,13 +976,11 @@ define double @vreduce_fmax_nxv16f64( %v) { ; CHECK-LABEL: vreduce_fmax_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI65_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI65_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v16, (a0), zero +; CHECK-NEXT: fld ft0, %lo(.LCPI65_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vfmv.s.f v16, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1039,9 +991,8 @@ define float @vreduce_nsz_fadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_nsz_fadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1066,9 +1017,8 @@ ; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1090,7 +1040,7 @@ ; CHECK-NEXT: vfmv.v.f v10, ft0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v9, v10, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v10 @@ -1118,7 +1068,7 @@ ; CHECK-NEXT: vslideup.vi v11, v12, 0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v11, v12, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v12 @@ -1133,11 +1083,10 @@ define half @vreduce_ord_fadd_nxv12f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv12f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: fmv.h.x ft0, zero ; CHECK-NEXT: fneg.h ft0, ft0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v11, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfredosum.vs v8, v8, v12 @@ -1162,9 +1111,8 @@ ; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1184,7 +1132,7 @@ ; CHECK-NEXT: vfmv.v.f v10, ft0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v9, v10, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v10 @@ -1212,7 +1160,7 @@ ; CHECK-NEXT: vslideup.vi v11, v12, 0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v11, v12, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v12 @@ -1229,9 +1177,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI74_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI74_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v12, ft0 ; CHECK-NEXT: vfmv.v.f v11, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v12 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll @@ -9,9 +9,8 @@ define signext i8 @vreduce_add_nxv1i8( %v) { ; CHECK-LABEL: vreduce_add_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -24,9 +23,8 @@ define signext i8 @vreduce_umax_nxv1i8( %v) { ; CHECK-LABEL: vreduce_umax_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -40,9 +38,8 @@ ; CHECK-LABEL: vreduce_smax_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -55,9 +52,8 @@ define signext i8 @vreduce_umin_nxv1i8( %v) { ; CHECK-LABEL: vreduce_umin_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -71,9 +67,8 @@ ; CHECK-LABEL: vreduce_smin_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vredmin.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -86,9 +81,8 @@ define signext i8 @vreduce_and_nxv1i8( %v) { ; CHECK-LABEL: vreduce_and_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -101,9 +95,8 @@ define signext i8 @vreduce_or_nxv1i8( %v) { ; CHECK-LABEL: vreduce_or_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -116,9 +109,8 @@ define signext i8 @vreduce_xor_nxv1i8( %v) { ; CHECK-LABEL: vreduce_xor_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -131,9 +123,8 @@ define signext i8 @vreduce_add_nxv2i8( %v) { ; CHECK-LABEL: vreduce_add_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -146,9 +137,8 @@ define signext i8 @vreduce_umax_nxv2i8( %v) { ; CHECK-LABEL: vreduce_umax_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -162,9 +152,8 @@ ; CHECK-LABEL: vreduce_smax_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -177,9 +166,8 @@ define signext i8 @vreduce_umin_nxv2i8( %v) { ; CHECK-LABEL: vreduce_umin_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -193,9 +181,8 @@ ; CHECK-LABEL: vreduce_smin_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vredmin.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -208,9 +195,8 @@ define signext i8 @vreduce_and_nxv2i8( %v) { ; CHECK-LABEL: vreduce_and_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -223,9 +209,8 @@ define signext i8 @vreduce_or_nxv2i8( %v) { ; CHECK-LABEL: vreduce_or_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -238,9 +223,8 @@ define signext i8 @vreduce_xor_nxv2i8( %v) { ; CHECK-LABEL: vreduce_xor_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -253,9 +237,8 @@ define signext i8 @vreduce_add_nxv4i8( %v) { ; CHECK-LABEL: vreduce_add_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -268,9 +251,8 @@ define signext i8 @vreduce_umax_nxv4i8( %v) { ; CHECK-LABEL: vreduce_umax_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -284,9 +266,8 @@ ; CHECK-LABEL: vreduce_smax_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -299,9 +280,8 @@ define signext i8 @vreduce_umin_nxv4i8( %v) { ; CHECK-LABEL: vreduce_umin_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -315,9 +295,8 @@ ; CHECK-LABEL: vreduce_smin_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vredmin.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -330,9 +309,8 @@ define signext i8 @vreduce_and_nxv4i8( %v) { ; CHECK-LABEL: vreduce_and_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -345,9 +323,8 @@ define signext i8 @vreduce_or_nxv4i8( %v) { ; CHECK-LABEL: vreduce_or_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -360,9 +337,8 @@ define signext i8 @vreduce_xor_nxv4i8( %v) { ; CHECK-LABEL: vreduce_xor_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -375,9 +351,8 @@ define signext i16 @vreduce_add_nxv1i16( %v) { ; CHECK-LABEL: vreduce_add_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -388,9 +363,9 @@ define signext i16 @vwreduce_add_nxv1i8( %v) { ; CHECK-LABEL: vwreduce_add_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -403,9 +378,9 @@ define signext i16 @vwreduce_uadd_nxv1i8( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -420,9 +395,8 @@ define signext i16 @vreduce_umax_nxv1i16( %v) { ; CHECK-LABEL: vreduce_umax_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -436,9 +410,8 @@ ; CHECK-LABEL: vreduce_smax_nxv1i16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -451,9 +424,8 @@ define signext i16 @vreduce_umin_nxv1i16( %v) { ; CHECK-LABEL: vreduce_umin_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -468,9 +440,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -479,9 +450,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -494,9 +464,8 @@ define signext i16 @vreduce_and_nxv1i16( %v) { ; CHECK-LABEL: vreduce_and_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -509,9 +478,8 @@ define signext i16 @vreduce_or_nxv1i16( %v) { ; CHECK-LABEL: vreduce_or_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -524,9 +492,8 @@ define signext i16 @vreduce_xor_nxv1i16( %v) { ; CHECK-LABEL: vreduce_xor_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -539,9 +506,8 @@ define signext i16 @vreduce_add_nxv2i16( %v) { ; CHECK-LABEL: vreduce_add_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -552,9 +518,9 @@ define signext i16 @vwreduce_add_nxv2i8( %v) { ; CHECK-LABEL: vwreduce_add_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -567,9 +533,9 @@ define signext i16 @vwreduce_uadd_nxv2i8( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -584,9 +550,8 @@ define signext i16 @vreduce_umax_nxv2i16( %v) { ; CHECK-LABEL: vreduce_umax_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -600,9 +565,8 @@ ; CHECK-LABEL: vreduce_smax_nxv2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -615,9 +579,8 @@ define signext i16 @vreduce_umin_nxv2i16( %v) { ; CHECK-LABEL: vreduce_umin_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -632,9 +595,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -643,9 +605,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -658,9 +619,8 @@ define signext i16 @vreduce_and_nxv2i16( %v) { ; CHECK-LABEL: vreduce_and_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -673,9 +633,8 @@ define signext i16 @vreduce_or_nxv2i16( %v) { ; CHECK-LABEL: vreduce_or_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -688,9 +647,8 @@ define signext i16 @vreduce_xor_nxv2i16( %v) { ; CHECK-LABEL: vreduce_xor_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -703,9 +661,8 @@ define signext i16 @vreduce_add_nxv4i16( %v) { ; CHECK-LABEL: vreduce_add_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -716,9 +673,9 @@ define signext i16 @vwreduce_add_nxv4i8( %v) { ; CHECK-LABEL: vwreduce_add_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -731,9 +688,9 @@ define signext i16 @vwreduce_uadd_nxv4i8( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -748,9 +705,8 @@ define signext i16 @vreduce_umax_nxv4i16( %v) { ; CHECK-LABEL: vreduce_umax_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -764,9 +720,8 @@ ; CHECK-LABEL: vreduce_smax_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -779,9 +734,8 @@ define signext i16 @vreduce_umin_nxv4i16( %v) { ; CHECK-LABEL: vreduce_umin_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -796,9 +750,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -807,9 +760,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -822,9 +774,8 @@ define signext i16 @vreduce_and_nxv4i16( %v) { ; CHECK-LABEL: vreduce_and_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -837,9 +788,8 @@ define signext i16 @vreduce_or_nxv4i16( %v) { ; CHECK-LABEL: vreduce_or_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -852,9 +802,8 @@ define signext i16 @vreduce_xor_nxv4i16( %v) { ; CHECK-LABEL: vreduce_xor_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -867,9 +816,8 @@ define signext i32 @vreduce_add_nxv1i32( %v) { ; CHECK-LABEL: vreduce_add_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -880,9 +828,9 @@ define signext i32 @vwreduce_add_nxv1i16( %v) { ; CHECK-LABEL: vwreduce_add_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -895,9 +843,9 @@ define signext i32 @vwreduce_uadd_nxv1i16( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -912,9 +860,8 @@ define signext i32 @vreduce_umax_nxv1i32( %v) { ; CHECK-LABEL: vreduce_umax_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -928,9 +875,8 @@ ; CHECK-LABEL: vreduce_smax_nxv1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -943,9 +889,8 @@ define signext i32 @vreduce_umin_nxv1i32( %v) { ; CHECK-LABEL: vreduce_umin_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -960,9 +905,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -971,9 +915,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -986,9 +929,8 @@ define signext i32 @vreduce_and_nxv1i32( %v) { ; CHECK-LABEL: vreduce_and_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1001,9 +943,8 @@ define signext i32 @vreduce_or_nxv1i32( %v) { ; CHECK-LABEL: vreduce_or_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1016,9 +957,8 @@ define signext i32 @vreduce_xor_nxv1i32( %v) { ; CHECK-LABEL: vreduce_xor_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1031,9 +971,8 @@ define signext i32 @vreduce_add_nxv2i32( %v) { ; CHECK-LABEL: vreduce_add_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1044,9 +983,9 @@ define signext i32 @vwreduce_add_nxv2i16( %v) { ; CHECK-LABEL: vwreduce_add_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -1059,9 +998,9 @@ define signext i32 @vwreduce_uadd_nxv2i16( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -1076,9 +1015,8 @@ define signext i32 @vreduce_umax_nxv2i32( %v) { ; CHECK-LABEL: vreduce_umax_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1092,9 +1030,8 @@ ; CHECK-LABEL: vreduce_smax_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1107,9 +1044,8 @@ define signext i32 @vreduce_umin_nxv2i32( %v) { ; CHECK-LABEL: vreduce_umin_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1124,9 +1060,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -1135,9 +1070,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1150,9 +1084,8 @@ define signext i32 @vreduce_and_nxv2i32( %v) { ; CHECK-LABEL: vreduce_and_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1165,9 +1098,8 @@ define signext i32 @vreduce_or_nxv2i32( %v) { ; CHECK-LABEL: vreduce_or_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1180,9 +1112,8 @@ define signext i32 @vreduce_xor_nxv2i32( %v) { ; CHECK-LABEL: vreduce_xor_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1195,7 +1126,7 @@ define signext i32 @vreduce_add_nxv4i32( %v) { ; CHECK-LABEL: vreduce_add_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vredsum.vs v8, v8, v10 @@ -1208,7 +1139,7 @@ define signext i32 @vwreduce_add_nxv4i16( %v) { ; CHECK-LABEL: vwreduce_add_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 @@ -1223,7 +1154,7 @@ define signext i32 @vwreduce_uadd_nxv4i16( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 @@ -1240,7 +1171,7 @@ define signext i32 @vreduce_umax_nxv4i32( %v) { ; CHECK-LABEL: vreduce_umax_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vredmaxu.vs v8, v8, v10 @@ -1256,7 +1187,7 @@ ; CHECK-LABEL: vreduce_smax_nxv4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v10 @@ -1271,7 +1202,7 @@ define signext i32 @vreduce_umin_nxv4i32( %v) { ; CHECK-LABEL: vreduce_umin_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v10 @@ -1288,7 +1219,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV32-NEXT: vmv.s.x v10, a0 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v10 @@ -1299,7 +1230,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v10 @@ -1314,7 +1245,7 @@ define signext i32 @vreduce_and_nxv4i32( %v) { ; CHECK-LABEL: vreduce_and_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v10 @@ -1329,7 +1260,7 @@ define signext i32 @vreduce_or_nxv4i32( %v) { ; CHECK-LABEL: vreduce_or_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vredor.vs v8, v8, v10 @@ -1344,7 +1275,7 @@ define signext i32 @vreduce_xor_nxv4i32( %v) { ; CHECK-LABEL: vreduce_xor_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vredxor.vs v8, v8, v10 @@ -1372,9 +1303,8 @@ ; ; RV64-LABEL: vreduce_add_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vredsum.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1399,9 +1329,9 @@ ; ; RV64-LABEL: vwreduce_add_nxv1i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1428,9 +1358,9 @@ ; ; RV64-LABEL: vwreduce_uadd_nxv1i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1458,9 +1388,8 @@ ; ; RV64-LABEL: vreduce_umax_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vredmaxu.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1495,9 +1424,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vredmax.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1523,9 +1451,8 @@ ; ; RV64-LABEL: vreduce_umin_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, -1 ; RV64-NEXT: vredminu.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1562,9 +1489,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1590,9 +1516,8 @@ ; ; RV64-LABEL: vreduce_and_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, -1 ; RV64-NEXT: vredand.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1618,9 +1543,8 @@ ; ; RV64-LABEL: vreduce_or_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vredor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1646,9 +1570,8 @@ ; ; RV64-LABEL: vreduce_xor_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vredxor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1674,7 +1597,7 @@ ; ; RV64-LABEL: vreduce_add_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vredsum.vs v8, v8, v10 @@ -1701,7 +1624,7 @@ ; ; RV64-LABEL: vwreduce_add_nxv2i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v9 @@ -1730,7 +1653,7 @@ ; ; RV64-LABEL: vwreduce_uadd_nxv2i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v9 @@ -1760,7 +1683,7 @@ ; ; RV64-LABEL: vreduce_umax_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vredmaxu.vs v8, v8, v10 @@ -1797,7 +1720,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vredmax.vs v8, v8, v10 @@ -1825,7 +1748,7 @@ ; ; RV64-LABEL: vreduce_umin_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v10, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v10 @@ -1864,7 +1787,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v10 @@ -1892,7 +1815,7 @@ ; ; RV64-LABEL: vreduce_and_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v10, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v10 @@ -1920,7 +1843,7 @@ ; ; RV64-LABEL: vreduce_or_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vredor.vs v8, v8, v10 @@ -1948,7 +1871,7 @@ ; ; RV64-LABEL: vreduce_xor_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vredxor.vs v8, v8, v10 @@ -1976,7 +1899,7 @@ ; ; RV64-LABEL: vreduce_add_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vredsum.vs v8, v8, v12 @@ -2003,7 +1926,7 @@ ; ; RV64-LABEL: vwreduce_add_nxv4i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v10 @@ -2032,7 +1955,7 @@ ; ; RV64-LABEL: vwreduce_uadd_nxv4i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v10 @@ -2062,7 +1985,7 @@ ; ; RV64-LABEL: vreduce_umax_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vredmaxu.vs v8, v8, v12 @@ -2099,7 +2022,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, a0 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vredmax.vs v8, v8, v12 @@ -2127,7 +2050,7 @@ ; ; RV64-LABEL: vreduce_umin_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v12 @@ -2166,7 +2089,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, a0 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v12 @@ -2194,7 +2117,7 @@ ; ; RV64-LABEL: vreduce_and_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v12 @@ -2222,7 +2145,7 @@ ; ; RV64-LABEL: vreduce_or_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vredor.vs v8, v8, v12 @@ -2250,7 +2173,7 @@ ; ; RV64-LABEL: vreduce_xor_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vredxor.vs v8, v8, v12