diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5720,9 +5720,12 @@ SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); - SDValue IdentitySplat = - lowerScalarSplat(SDValue(), NeutralElem, DAG.getConstant(1, DL, XLenVT), - M1VT, DL, DAG, Subtarget); + SDValue IdentitySplat = lowerScalarSplat(SDValue(), NeutralElem, VL, + ContainerVT, DL, DAG, Subtarget); + if (ElementCount::isKnownLT(M1VT.getVectorElementCount(), + ContainerVT.getVectorElementCount())) + IdentitySplat = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, IdentitySplat, + DAG.getConstant(0, DL, XLenVT)); SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec, IdentitySplat, Mask, VL); SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, @@ -5782,9 +5785,12 @@ auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); - SDValue ScalarSplat = - lowerScalarSplat(SDValue(), ScalarVal, DAG.getConstant(1, DL, XLenVT), - M1VT, DL, DAG, Subtarget); + SDValue ScalarSplat = lowerScalarSplat(SDValue(), ScalarVal, VL, ContainerVT, + DL, DAG, Subtarget); + if (ElementCount::isKnownLT(M1VT.getVectorElementCount(), + ContainerVT.getVectorElementCount())) + ScalarSplat = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, ScalarSplat, + DAG.getConstant(0, DL, XLenVT)); SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), VectorVal, ScalarSplat, Mask, VL); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, @@ -5850,11 +5856,14 @@ MVT XLenVT = Subtarget.getXLenVT(); MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT; - SDValue StartSplat = lowerScalarSplat(SDValue(), Op.getOperand(0), - DAG.getConstant(1, DL, XLenVT), M1VT, - DL, DAG, Subtarget); - SDValue Reduction = - DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL); + SDValue StartSplat = lowerScalarSplat(SDValue(), Op.getOperand(0), VL, + ContainerVT, DL, DAG, Subtarget); + if (ElementCount::isKnownLT(M1VT.getVectorElementCount(), + ContainerVT.getVectorElementCount())) + StartSplat = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, StartSplat, + DAG.getConstant(0, DL, XLenVT)); + SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec, + StartSplat, Mask, VL); SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction, DAG.getConstant(0, DL, XLenVT)); if (!VecVT.isInteger()) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll @@ -9,11 +9,10 @@ define half @vpreduce_fadd_v2f16(half %s, <2 x half> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.v2f16(half %s, <2 x half> %v, <2 x i1> %m, i32 %evl) ret half %r @@ -22,11 +21,10 @@ define half @vpreduce_ord_fadd_v2f16(half %s, <2 x half> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call half @llvm.vp.reduce.fadd.v2f16(half %s, <2 x half> %v, <2 x i1> %m, i32 %evl) ret half %r @@ -37,11 +35,10 @@ define half @vpreduce_fadd_v4f16(half %s, <4 x half> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.v4f16(half %s, <4 x half> %v, <4 x i1> %m, i32 %evl) ret half %r @@ -50,11 +47,10 @@ define half @vpreduce_ord_fadd_v4f16(half %s, <4 x half> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call half @llvm.vp.reduce.fadd.v4f16(half %s, <4 x half> %v, <4 x i1> %m, i32 %evl) ret half %r @@ -65,11 +61,10 @@ define float @vpreduce_fadd_v2f32(float %s, <2 x float> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc float @llvm.vp.reduce.fadd.v2f32(float %s, <2 x float> %v, <2 x i1> %m, i32 %evl) ret float %r @@ -78,11 +73,10 @@ define float @vpreduce_ord_fadd_v2f32(float %s, <2 x float> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call float @llvm.vp.reduce.fadd.v2f32(float %s, <2 x float> %v, <2 x i1> %m, i32 %evl) ret float %r @@ -93,11 +87,10 @@ define float @vpreduce_fadd_v4f32(float %s, <4 x float> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %s, <4 x float> %v, <4 x i1> %m, i32 %evl) ret float %r @@ -106,11 +99,10 @@ define float @vpreduce_ord_fadd_v4f32(float %s, <4 x float> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call float @llvm.vp.reduce.fadd.v4f32(float %s, <4 x float> %v, <4 x i1> %m, i32 %evl) ret float %r @@ -123,25 +115,23 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a2, 32 -; CHECK-NEXT: vslidedown.vi v24, v0, 4 +; CHECK-NEXT: vslidedown.vi v1, v0, 4 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB8_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: .LBB8_2: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v24, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v24, v0.t +; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: addi a1, a0, -32 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v8, ft0 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfredusum.vs v8, v16, v8, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -154,25 +144,23 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a2, 32 -; CHECK-NEXT: vslidedown.vi v24, v0, 4 +; CHECK-NEXT: vslidedown.vi v1, v0, 4 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB9_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: .LBB9_2: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v24, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v24, v0.t +; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: addi a1, a0, -32 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v8, ft0 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfredosum.vs v8, v16, v8, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -185,11 +173,10 @@ define double @vpreduce_fadd_v2f64(double %s, <2 x double> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc double @llvm.vp.reduce.fadd.v2f64(double %s, <2 x double> %v, <2 x i1> %m, i32 %evl) ret double %r @@ -198,11 +185,10 @@ define double @vpreduce_ord_fadd_v2f64(double %s, <2 x double> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call double @llvm.vp.reduce.fadd.v2f64(double %s, <2 x double> %v, <2 x i1> %m, i32 %evl) ret double %r @@ -213,11 +199,10 @@ define double @vpreduce_fadd_v3f64(double %s, <3 x double> %v, <3 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_v3f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma -; CHECK-NEXT: vfredusum.vs v10, v8, v10, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v10, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc double @llvm.vp.reduce.fadd.v3f64(double %s, <3 x double> %v, <3 x i1> %m, i32 %evl) ret double %r @@ -226,11 +211,10 @@ define double @vpreduce_ord_fadd_v3f64(double %s, <3 x double> %v, <3 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_v3f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma -; CHECK-NEXT: vfredosum.vs v10, v8, v10, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v10, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call double @llvm.vp.reduce.fadd.v3f64(double %s, <3 x double> %v, <3 x i1> %m, i32 %evl) ret double %r @@ -241,11 +225,10 @@ define double @vpreduce_fadd_v4f64(double %s, <4 x double> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma -; CHECK-NEXT: vfredusum.vs v10, v8, v10, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v10, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc double @llvm.vp.reduce.fadd.v4f64(double %s, <4 x double> %v, <4 x i1> %m, i32 %evl) ret double %r @@ -254,11 +237,10 @@ define double @vpreduce_ord_fadd_v4f64(double %s, <4 x double> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma -; CHECK-NEXT: vfredosum.vs v10, v8, v10, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v10, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call double @llvm.vp.reduce.fadd.v4f64(double %s, <4 x double> %v, <4 x i1> %m, i32 %evl) ret double %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>) @@ -38,9 +38,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: fneg.h ft0, ft0 +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <2 x half>, <2 x half>* %x %red = call reassoc half @llvm.vector.reduce.fadd.v2f16(half %s, <2 x half> %v) @@ -52,7 +55,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -68,9 +71,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: fneg.h ft0, ft0 +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <4 x half>, <4 x half>* %x %red = call reassoc half @llvm.vector.reduce.fadd.v4f16(half %s, <4 x half> %v) @@ -82,7 +88,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -98,9 +104,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: fneg.h ft0, ft0 +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <8 x half>, <8 x half>* %x %red = call reassoc half @llvm.vector.reduce.fadd.v8f16(half %s, <8 x half> %v) @@ -112,7 +121,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -128,9 +137,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: fneg.h ft0, ft0 +; CHECK-NEXT: vfmv.v.f v10, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <16 x half>, <16 x half>* %x %red = call reassoc half @llvm.vector.reduce.fadd.v16f16(half %s, <16 x half> %v) @@ -142,7 +154,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -159,11 +171,12 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: fneg.h ft0, ft0 +; CHECK-NEXT: vfmv.v.f v12, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v12 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <32 x half>, <32 x half>* %x %red = call reassoc half @llvm.vector.reduce.fadd.v32f16(half %s, <32 x half> %v) @@ -176,9 +189,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -195,11 +206,12 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: fneg.h ft0, ft0 +; CHECK-NEXT: vfmv.v.f v16, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v16 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <64 x half>, <64 x half>* %x %red = call reassoc half @llvm.vector.reduce.fadd.v64f16(half %s, <64 x half> %v) @@ -212,9 +224,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -234,11 +244,12 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vfadd.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: fneg.h ft0, ft0 +; CHECK-NEXT: vfmv.v.f v16, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v16 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <128 x half>, <128 x half>* %x %red = call reassoc half @llvm.vector.reduce.fadd.v128f16(half %s, <128 x half> %v) @@ -248,20 +259,16 @@ define half @vreduce_ord_fadd_v128f16(<128 x half>* %x, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_v128f16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: li a2, 64 -; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v24, fa0 -; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vfredosum.vs v16, v16, v24 -; CHECK-NEXT: vfmv.f.s ft0, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, ft0 -; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vfredosum.vs v8, v8, v16 +; CHECK-NEXT: vfmv.v.f v24, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v24 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vfmv.v.f v8, ft0 +; CHECK-NEXT: vfredosum.vs v8, v16, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x half>, <128 x half>* %x @@ -317,11 +324,10 @@ define float @vreduce_ord_fwadd_v1f32(<1 x half>* %x, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -339,9 +345,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <2 x float>, <2 x float>* %x %red = call reassoc float @llvm.vector.reduce.fadd.v2f32(float %s, <2 x float> %v) @@ -353,7 +362,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -365,14 +374,16 @@ define float @vreduce_fwadd_v2f32(<2 x half>* %x, float %s) { ; CHECK-LABEL: vreduce_fwadd_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <2 x half>, <2 x half>* %x %e = fpext <2 x half> %v to <2 x float> @@ -383,11 +394,10 @@ define float @vreduce_ord_fwadd_v2f32(<2 x half>* %x, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -405,9 +415,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <4 x float>, <4 x float>* %x %red = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %s, <4 x float> %v) @@ -419,7 +432,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -433,11 +446,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <4 x half>, <4 x half>* %x %e = fpext <4 x half> %v to <4 x float> @@ -450,7 +466,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -469,9 +485,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vfmv.v.f v10, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <8 x float>, <8 x float>* %x %red = call reassoc float @llvm.vector.reduce.fadd.v8f32(float %s, <8 x float> %v) @@ -483,7 +502,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -495,14 +514,16 @@ define float @vreduce_fwadd_v8f32(<8 x half>* %x, float %s) { ; CHECK-LABEL: vreduce_fwadd_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfwredusum.vs v8, v8, v9 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vfmv.v.f v10, ft0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwredusum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <8 x half>, <8 x half>* %x %e = fpext <8 x half> %v to <8 x float> @@ -513,12 +534,11 @@ define float @vreduce_ord_fwadd_v8f32(<8 x half>* %x, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vfwredosum.vs v8, v8, v9 +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwredosum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -535,9 +555,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v12, fa0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vfmv.v.f v12, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v12 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <16 x float>, <16 x float>* %x %red = call reassoc float @llvm.vector.reduce.fadd.v16f32(float %s, <16 x float> %v) @@ -549,7 +572,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v12, fa0 +; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -561,14 +584,16 @@ define float @vreduce_fwadd_v16f32(<16 x half>* %x, float %s) { ; CHECK-LABEL: vreduce_fwadd_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vfwredusum.vs v8, v8, v10 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vfmv.v.f v12, ft0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwredusum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <16 x half>, <16 x half>* %x %e = fpext <16 x half> %v to <16 x float> @@ -579,12 +604,11 @@ define float @vreduce_ord_fwadd_v16f32(<16 x half>* %x, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vfwredosum.vs v8, v8, v10 +; CHECK-NEXT: vfmv.v.f v12, fa0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vfwredosum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -602,11 +626,12 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vfmv.v.f v16, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v16 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <32 x float>, <32 x float>* %x %red = call reassoc float @llvm.vector.reduce.fadd.v32f32(float %s, <32 x float> %v) @@ -619,9 +644,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -634,14 +657,16 @@ ; CHECK-LABEL: vreduce_fwadd_v32f32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vfwredusum.vs v8, v8, v12 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwredusum.vs v8, v8, v16 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <32 x half>, <32 x half>* %x %e = fpext <32 x half> %v to <32 x float> @@ -653,12 +678,11 @@ ; CHECK-LABEL: vreduce_ord_fwadd_v32f32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vfwredosum.vs v8, v8, v12 +; CHECK-NEXT: vfmv.v.f v16, fa0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfwredosum.vs v8, v8, v16 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -679,11 +703,12 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vfadd.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vfmv.v.f v16, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v16 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <64 x float>, <64 x float>* %x %red = call reassoc float @llvm.vector.reduce.fadd.v64f32(float %s, <64 x float> %v) @@ -693,20 +718,16 @@ define float @vreduce_ord_fadd_v64f32(<64 x float>* %x, float %s) { ; CHECK-LABEL: vreduce_ord_fadd_v64f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: li a2, 32 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v24, fa0 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vfredosum.vs v16, v16, v24 -; CHECK-NEXT: vfmv.f.s ft0, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, ft0 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vfredosum.vs v8, v8, v16 +; CHECK-NEXT: vfmv.v.f v24, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v24 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vfmv.v.f v8, ft0 +; CHECK-NEXT: vfredosum.vs v8, v16, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <64 x float>, <64 x float>* %x @@ -725,11 +746,13 @@ ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v8, ft0 ; CHECK-NEXT: vfredusum.vs v8, v24, v8 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <64 x half>, <64 x half>* %x %e = fpext <64 x half> %v to <64 x float> @@ -746,15 +769,15 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v16, a0 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v24, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v24, fa0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v16, v16, v24 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s ft0, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, ft0 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v16 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -830,14 +853,29 @@ declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) define double @vreduce_fadd_v2f64(<2 x double>* %x, double %s) { -; CHECK-LABEL: vreduce_fadd_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fadd_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vfmv.v.f v9, ft0 +; RV32-NEXT: vfredusum.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fadd_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vfmv.v.f v9, ft0 +; RV64-NEXT: vfredusum.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %v = load <2 x double>, <2 x double>* %x %red = call reassoc double @llvm.vector.reduce.fadd.v2f64(double %s, <2 x double> %v) ret double %red @@ -848,7 +886,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -858,16 +896,33 @@ } define double @vreduce_fwadd_v2f64(<2 x float>* %x, double %s) { -; CHECK-LABEL: vreduce_fwadd_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfwredusum.vs v8, v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fwadd_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vfmv.v.f v9, ft0 +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfwredusum.vs v8, v8, v9 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fwadd_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vfmv.v.f v9, ft0 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfwredusum.vs v8, v8, v9 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %v = load <2 x float>, <2 x float>* %x %e = fpext <2 x float> %v to <2 x double> %red = call reassoc double @llvm.vector.reduce.fadd.v2f64(double %s, <2 x double> %e) @@ -879,7 +934,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -894,14 +949,29 @@ declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>) define double @vreduce_fadd_v4f64(<4 x double>* %x, double %s) { -; CHECK-LABEL: vreduce_fadd_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vfredusum.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fadd_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vfmv.v.f v10, ft0 +; RV32-NEXT: vfredusum.vs v8, v8, v10 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fadd_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vfmv.v.f v10, ft0 +; RV64-NEXT: vfredusum.vs v8, v8, v10 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %v = load <4 x double>, <4 x double>* %x %red = call reassoc double @llvm.vector.reduce.fadd.v4f64(double %s, <4 x double> %v) ret double %red @@ -912,7 +982,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -922,17 +992,33 @@ } define double @vreduce_fwadd_v4f64(<4 x float>* %x, double %s) { -; CHECK-LABEL: vreduce_fwadd_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfwredusum.vs v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fwadd_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vfmv.v.f v10, ft0 +; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32-NEXT: vfwredusum.vs v8, v8, v10 +; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fwadd_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vfmv.v.f v10, ft0 +; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64-NEXT: vfwredusum.vs v8, v8, v10 +; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %v = load <4 x float>, <4 x float>* %x %e = fpext <4 x float> %v to <4 x double> %red = call reassoc double @llvm.vector.reduce.fadd.v4f64(double %s, <4 x double> %e) @@ -942,12 +1028,11 @@ define double @vreduce_ord_fwadd_v4f64(<4 x float>* %x, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vfwredosum.vs v8, v8, v9 +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfwredosum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -960,14 +1045,29 @@ declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>) define double @vreduce_fadd_v8f64(<8 x double>* %x, double %s) { -; CHECK-LABEL: vreduce_fadd_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vfredusum.vs v8, v8, v12 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fadd_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vfmv.v.f v12, ft0 +; RV32-NEXT: vfredusum.vs v8, v8, v12 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fadd_v8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vfmv.v.f v12, ft0 +; RV64-NEXT: vfredusum.vs v8, v8, v12 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %v = load <8 x double>, <8 x double>* %x %red = call reassoc double @llvm.vector.reduce.fadd.v8f64(double %s, <8 x double> %v) ret double %red @@ -978,7 +1078,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v12, fa0 +; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -988,17 +1088,33 @@ } define double @vreduce_fwadd_v8f64(<8 x float>* %x, double %s) { -; CHECK-LABEL: vreduce_fwadd_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vfwredusum.vs v8, v8, v10 -; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fwadd_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vfmv.v.f v12, ft0 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfwredusum.vs v8, v8, v12 +; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fwadd_v8f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vfmv.v.f v12, ft0 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfwredusum.vs v8, v8, v12 +; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %v = load <8 x float>, <8 x float>* %x %e = fpext <8 x float> %v to <8 x double> %red = call reassoc double @llvm.vector.reduce.fadd.v8f64(double %s, <8 x double> %e) @@ -1008,12 +1124,11 @@ define double @vreduce_ord_fwadd_v8f64(<8 x float>* %x, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vfwredosum.vs v8, v8, v10 +; CHECK-NEXT: vfmv.v.f v12, fa0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfwredosum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1026,14 +1141,29 @@ declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>) define double @vreduce_fadd_v16f64(<16 x double>* %x, double %s) { -; CHECK-LABEL: vreduce_fadd_v16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v16, fa0 -; CHECK-NEXT: vfredusum.vs v8, v8, v16 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fadd_v16f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vfmv.v.f v16, ft0 +; RV32-NEXT: vfredusum.vs v8, v8, v16 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fadd_v16f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vfmv.v.f v16, ft0 +; RV64-NEXT: vfredusum.vs v8, v8, v16 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %v = load <16 x double>, <16 x double>* %x %red = call reassoc double @llvm.vector.reduce.fadd.v16f64(double %s, <16 x double> %v) ret double %red @@ -1044,7 +1174,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1054,17 +1184,33 @@ } define double @vreduce_fwadd_v16f64(<16 x float>* %x, double %s) { -; CHECK-LABEL: vreduce_fwadd_v16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vfwredusum.vs v8, v8, v12 -; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fwadd_v16f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vfmv.v.f v16, ft0 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vfwredusum.vs v8, v8, v16 +; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fwadd_v16f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vfmv.v.f v16, ft0 +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV64-NEXT: vfwredusum.vs v8, v8, v16 +; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %v = load <16 x float>, <16 x float>* %x %e = fpext <16 x float> %v to <16 x double> %red = call reassoc double @llvm.vector.reduce.fadd.v16f64(double %s, <16 x double> %e) @@ -1074,12 +1220,11 @@ define double @vreduce_ord_fwadd_v16f64(<16 x float>* %x, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vfwredosum.vs v8, v8, v12 +; CHECK-NEXT: vfmv.v.f v16, fa0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-NEXT: vfwredosum.vs v8, v8, v16 ; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1092,17 +1237,35 @@ declare double @llvm.vector.reduce.fadd.v32f64(double, <32 x double>) define double @vreduce_fadd_v32f64(<32 x double>* %x, double %s) { -; CHECK-LABEL: vreduce_fadd_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vfmv.s.f v24, fa0 -; CHECK-NEXT: vfadd.vv v8, v8, v16 -; CHECK-NEXT: vfredusum.vs v8, v8, v24 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fadd_v32f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vle64.v v16, (a0) +; RV32-NEXT: vfadd.vv v8, v8, v16 +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vfmv.v.f v16, ft0 +; RV32-NEXT: vfredusum.vs v8, v8, v16 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fadd_v32f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vfadd.vv v8, v8, v16 +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vfmv.v.f v16, ft0 +; RV64-NEXT: vfredusum.vs v8, v8, v16 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %v = load <32 x double>, <32 x double>* %x %red = call reassoc double @llvm.vector.reduce.fadd.v32f64(double %s, <32 x double> %v) ret double %red @@ -1115,10 +1278,10 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vfmv.s.f v24, fa0 +; CHECK-NEXT: vfmv.v.f v24, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: vfmv.v.f v8, ft0 ; CHECK-NEXT: vfredosum.vs v8, v16, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1131,17 +1294,16 @@ ; RV32-LABEL: vreduce_fwadd_v32f64: ; RV32: # %bb.0: ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v16, v8, 16 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vfwadd.vv v24, v8, v16 ; RV32-NEXT: fcvt.d.w ft0, zero ; RV32-NEXT: fneg.d ft0, ft0 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV32-NEXT: vfmv.s.f v8, ft0 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vfmv.v.f v8, ft0 ; RV32-NEXT: vfredusum.vs v8, v24, v8 ; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fadd.d fa0, fa0, ft0 @@ -1150,36 +1312,20 @@ ; RV64-LABEL: vreduce_fwadd_v32f64: ; RV64: # %bb.0: ; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu +; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV64-NEXT: vslidedown.vi v16, v8, 16 -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vfwadd.vv v24, v8, v16 ; RV64-NEXT: fmv.d.x ft0, zero ; RV64-NEXT: fneg.d ft0, ft0 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV64-NEXT: vfmv.s.f v8, ft0 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vfmv.v.f v8, ft0 ; RV64-NEXT: vfredusum.vs v8, v24, v8 ; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fadd.d fa0, fa0, ft0 ; RV64-NEXT: ret -; CHECK-LABEL: vreduce_fwadd_v32f64: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v16, v8, 16 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vfwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vfredusum.vs v8, v24, v8 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret %v = load <32 x float>, <32 x float>* %x %e = fpext <32 x float> %v to <32 x double> %red = call reassoc double @llvm.vector.reduce.fadd.v32f64(double %s, <32 x double> %e) @@ -1194,15 +1340,15 @@ ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v16, 16 -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v24, fa0 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v24, fa0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v16, v16, v24 ; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s ft0, v16 -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, ft0 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v16, ft0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v16 ; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1222,9 +1368,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI68_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI68_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1242,9 +1386,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI69_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI69_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1260,9 +1402,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI70_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI70_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1278,9 +1418,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI71_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI71_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1299,13 +1437,11 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI72_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI72_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v16, (a0), zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v16 +; CHECK-NEXT: vlse16.v v24, (a0), zero +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vfredmin.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x half>, <128 x half>* %x @@ -1322,9 +1458,7 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI73_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI73_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1342,9 +1476,7 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI74_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI74_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1360,9 +1492,7 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI75_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI75_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1378,9 +1508,7 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI76_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI76_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1394,25 +1522,36 @@ define float @vreduce_fmin_v128f32(<128 x float>* %x) { ; CHECK-LABEL: vreduce_fmin_v128f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: addi a2, a0, 384 -; CHECK-NEXT: vle32.v v16, (a2) -; CHECK-NEXT: addi a2, a0, 256 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vle32.v v24, (a1) ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: vle32.v v0, (a2) -; CHECK-NEXT: vfmin.vv v16, v24, v16 -; CHECK-NEXT: vfmin.vv v8, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vle32.v v0, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI77_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI77_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v16, (a0), zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v16 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vlse32.v v8, (a0), zero +; CHECK-NEXT: vfmin.vv v24, v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v0, v16 +; CHECK-NEXT: vfmin.vv v16, v16, v24 +; CHECK-NEXT: vfredmin.vs v8, v16, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = load <128 x float>, <128 x float>* %x %red = call float @llvm.vector.reduce.fmin.v128f32(<128 x float> %v) @@ -1428,9 +1567,7 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI78_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI78_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1448,9 +1585,7 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI79_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI79_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1466,9 +1601,7 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI80_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI80_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1484,9 +1617,7 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI81_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI81_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1504,13 +1635,11 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI82_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI82_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v16 +; CHECK-NEXT: vlse64.v v24, (a0), zero +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vfredmin.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <32 x double>, <32 x double>* %x @@ -1527,9 +1656,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI83_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI83_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1547,9 +1674,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI84_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI84_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1565,9 +1690,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI85_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI85_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1583,9 +1706,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI86_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI86_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1604,13 +1725,11 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI87_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI87_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v16, (a0), zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v16 +; CHECK-NEXT: vlse16.v v24, (a0), zero +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vfredmax.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x half>, <128 x half>* %x @@ -1627,9 +1746,7 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI88_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI88_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1647,9 +1764,7 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI89_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI89_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1665,9 +1780,7 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI90_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI90_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1683,9 +1796,7 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI91_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI91_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1699,25 +1810,36 @@ define float @vreduce_fmax_v128f32(<128 x float>* %x) { ; CHECK-LABEL: vreduce_fmax_v128f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: addi a2, a0, 384 -; CHECK-NEXT: vle32.v v16, (a2) -; CHECK-NEXT: addi a2, a0, 256 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vle32.v v24, (a1) ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: vle32.v v0, (a2) -; CHECK-NEXT: vfmax.vv v16, v24, v16 -; CHECK-NEXT: vfmax.vv v8, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vle32.v v0, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI92_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI92_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v16, (a0), zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v16 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vlse32.v v8, (a0), zero +; CHECK-NEXT: vfmax.vv v24, v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v0, v16 +; CHECK-NEXT: vfmax.vv v16, v16, v24 +; CHECK-NEXT: vfredmax.vs v8, v16, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = load <128 x float>, <128 x float>* %x %red = call float @llvm.vector.reduce.fmax.v128f32(<128 x float> %v) @@ -1733,9 +1855,7 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI93_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI93_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1753,9 +1873,7 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI94_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI94_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1771,9 +1889,7 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI95_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI95_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1789,9 +1905,7 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI96_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI96_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1809,13 +1923,11 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI97_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI97_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v16 +; CHECK-NEXT: vlse64.v v24, (a0), zero +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vfredmax.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <32 x double>, <32 x double>* %x @@ -1828,9 +1940,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <4 x float>, <4 x float>* %x %red = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float %s, <4 x float> %v) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -9,11 +9,10 @@ define signext i8 @vpreduce_add_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.add.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) ret i8 %r @@ -24,12 +23,10 @@ define signext i8 @vpreduce_umax_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umax.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) ret i8 %r @@ -40,11 +37,10 @@ define signext i8 @vpreduce_smax_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smax.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) ret i8 %r @@ -55,12 +51,10 @@ define signext i8 @vpreduce_umin_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umin.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) ret i8 %r @@ -71,11 +65,10 @@ define signext i8 @vpreduce_smin_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smin.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) ret i8 %r @@ -86,11 +79,10 @@ define signext i8 @vpreduce_and_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.and.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) ret i8 %r @@ -101,11 +93,10 @@ define signext i8 @vpreduce_or_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.or.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) ret i8 %r @@ -116,11 +107,10 @@ define signext i8 @vpreduce_xor_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.xor.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) ret i8 %r @@ -131,12 +121,10 @@ define signext i8 @vpreduce_umin_v3i8(i8 signext %s, <3 x i8> %v, <3 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_v3i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umin.v3i8(i8 %s, <3 x i8> %v, <3 x i1> %m, i32 %evl) ret i8 %r @@ -147,11 +135,10 @@ define signext i8 @vpreduce_add_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.add.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) ret i8 %r @@ -162,12 +149,10 @@ define signext i8 @vpreduce_umax_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umax.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) ret i8 %r @@ -178,11 +163,10 @@ define signext i8 @vpreduce_smax_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smax.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) ret i8 %r @@ -193,12 +177,10 @@ define signext i8 @vpreduce_umin_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umin.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) ret i8 %r @@ -209,11 +191,10 @@ define signext i8 @vpreduce_smin_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smin.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) ret i8 %r @@ -224,11 +205,10 @@ define signext i8 @vpreduce_and_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.and.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) ret i8 %r @@ -239,11 +219,10 @@ define signext i8 @vpreduce_or_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.or.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) ret i8 %r @@ -254,11 +233,10 @@ define signext i8 @vpreduce_xor_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.xor.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) ret i8 %r @@ -269,11 +247,10 @@ define signext i16 @vpreduce_add_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.add.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) ret i16 %r @@ -282,27 +259,13 @@ declare i16 @llvm.vp.reduce.umax.v2i16(i16, <2 x i16>, <2 x i1>, i32) define signext i16 @vpreduce_umax_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_v2i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_v2i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umax.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) ret i16 %r } @@ -312,11 +275,10 @@ define signext i16 @vpreduce_smax_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smax.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) ret i16 %r @@ -325,27 +287,13 @@ declare i16 @llvm.vp.reduce.umin.v2i16(i16, <2 x i16>, <2 x i1>, i32) define signext i16 @vpreduce_umin_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_v2i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_v2i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umin.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) ret i16 %r } @@ -355,11 +303,10 @@ define signext i16 @vpreduce_smin_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smin.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) ret i16 %r @@ -370,11 +317,10 @@ define signext i16 @vpreduce_and_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.and.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) ret i16 %r @@ -385,11 +331,10 @@ define signext i16 @vpreduce_or_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.or.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) ret i16 %r @@ -400,11 +345,10 @@ define signext i16 @vpreduce_xor_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.xor.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) ret i16 %r @@ -415,11 +359,10 @@ define signext i16 @vpreduce_add_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.add.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) ret i16 %r @@ -428,27 +371,13 @@ declare i16 @llvm.vp.reduce.umax.v4i16(i16, <4 x i16>, <4 x i1>, i32) define signext i16 @vpreduce_umax_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_v4i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_v4i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umax.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) ret i16 %r } @@ -458,11 +387,10 @@ define signext i16 @vpreduce_smax_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smax.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) ret i16 %r @@ -471,27 +399,13 @@ declare i16 @llvm.vp.reduce.umin.v4i16(i16, <4 x i16>, <4 x i1>, i32) define signext i16 @vpreduce_umin_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_v4i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_v4i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umin.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) ret i16 %r } @@ -501,11 +415,10 @@ define signext i16 @vpreduce_smin_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smin.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) ret i16 %r @@ -516,11 +429,10 @@ define signext i16 @vpreduce_and_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.and.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) ret i16 %r @@ -531,11 +443,10 @@ define signext i16 @vpreduce_or_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.or.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) ret i16 %r @@ -546,11 +457,10 @@ define signext i16 @vpreduce_xor_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.xor.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) ret i16 %r @@ -561,11 +471,10 @@ define signext i32 @vpreduce_add_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.add.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) ret i32 %r @@ -574,25 +483,13 @@ declare i32 @llvm.vp.reduce.umax.v2i32(i32, <2 x i32>, <2 x i1>, i32) define signext i32 @vpreduce_umax_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_v2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umax.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) ret i32 %r } @@ -602,11 +499,10 @@ define signext i32 @vpreduce_smax_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smax.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) ret i32 %r @@ -615,25 +511,13 @@ declare i32 @llvm.vp.reduce.umin.v2i32(i32, <2 x i32>, <2 x i1>, i32) define signext i32 @vpreduce_umin_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_v2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umin.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) ret i32 %r } @@ -643,11 +527,10 @@ define signext i32 @vpreduce_smin_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smin.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) ret i32 %r @@ -658,11 +541,10 @@ define signext i32 @vpreduce_and_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.and.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) ret i32 %r @@ -673,11 +555,10 @@ define signext i32 @vpreduce_or_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.or.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) ret i32 %r @@ -688,11 +569,10 @@ define signext i32 @vpreduce_xor_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.xor.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) ret i32 %r @@ -703,11 +583,10 @@ define signext i32 @vpreduce_add_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.add.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) ret i32 %r @@ -716,25 +595,13 @@ declare i32 @llvm.vp.reduce.umax.v4i32(i32, <4 x i32>, <4 x i1>, i32) define signext i32 @vpreduce_umax_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umax.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) ret i32 %r } @@ -744,11 +611,10 @@ define signext i32 @vpreduce_smax_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smax.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) ret i32 %r @@ -757,25 +623,13 @@ declare i32 @llvm.vp.reduce.umin.v4i32(i32, <4 x i32>, <4 x i1>, i32) define signext i32 @vpreduce_umin_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umin.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) ret i32 %r } @@ -785,11 +639,10 @@ define signext i32 @vpreduce_smin_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smin.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) ret i32 %r @@ -800,11 +653,10 @@ define signext i32 @vpreduce_and_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.and.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) ret i32 %r @@ -815,11 +667,10 @@ define signext i32 @vpreduce_or_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.or.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) ret i32 %r @@ -830,11 +681,10 @@ define signext i32 @vpreduce_xor_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.xor.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) ret i32 %r @@ -847,25 +697,23 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: vslidedown.vi v24, v0, 4 +; CHECK-NEXT: vslidedown.vi v1, v0, 4 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bltu a1, a3, .LBB49_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: .LBB49_2: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, tu, ma -; CHECK-NEXT: vredxor.vs v25, v8, v25, v0.t -; CHECK-NEXT: vmv.x.s a0, v25 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: addi a0, a1, -32 -; CHECK-NEXT: sltu a1, a1, a0 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.x v24, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v24, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: addi a2, a1, -32 +; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: and a1, a1, a2 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vredxor.vs v8, v16, v8, v0.t ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -883,25 +731,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredsum.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredsum.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_add_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredsum.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredsum.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.add.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) ret i64 %r @@ -917,25 +763,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umax_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umax.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) ret i64 %r @@ -951,25 +795,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredmax.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredmax.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_smax_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredmax.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredmax.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smax.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) ret i64 %r @@ -985,25 +827,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredminu.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umin_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredminu.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umin.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) ret i64 %r @@ -1019,25 +859,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredmin.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredmin.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_smin_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredmin.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredmin.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smin.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) ret i64 %r @@ -1053,25 +891,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredand.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredand.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_and_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredand.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredand.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.and.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) ret i64 %r @@ -1087,25 +923,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredor.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredor.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_or_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredor.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredor.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.or.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) ret i64 %r @@ -1121,25 +955,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredxor.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredxor.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_xor_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredxor.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredxor.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.xor.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) ret i64 %r @@ -1155,25 +987,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredsum.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vredsum.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_add_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredsum.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vredsum.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.add.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) ret i64 %r @@ -1189,25 +1019,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredmaxu.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vredmaxu.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umax_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredmaxu.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vredmaxu.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umax.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) ret i64 %r @@ -1223,25 +1051,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredmax.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vredmax.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_smax_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredmax.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vredmax.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smax.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) ret i64 %r @@ -1257,25 +1083,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredminu.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vredminu.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umin_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredminu.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vredminu.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umin.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) ret i64 %r @@ -1291,25 +1115,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredmin.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vredmin.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_smin_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredmin.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vredmin.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smin.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) ret i64 %r @@ -1325,25 +1147,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredand.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vredand.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_and_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredand.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vredand.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.and.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) ret i64 %r @@ -1359,25 +1179,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredor.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vredor.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_or_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredor.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vredor.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.or.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) ret i64 %r @@ -1393,25 +1211,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredxor.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vredxor.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_xor_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredxor.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vredxor.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.xor.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) ret i64 %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -23,7 +23,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -39,7 +39,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -55,7 +55,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -71,7 +71,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -88,9 +88,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -107,9 +105,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -126,9 +122,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -148,9 +142,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vadd.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -208,7 +200,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -220,11 +212,10 @@ define i16 @vwreduce_add_v2i16(<2 x i8>* %x) { ; CHECK-LABEL: vwreduce_add_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -238,11 +229,10 @@ define i16 @vwreduce_uadd_v2i16(<2 x i8>* %x) { ; CHECK-LABEL: vwreduce_uadd_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -260,7 +250,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -272,11 +262,10 @@ define i16 @vwreduce_add_v4i16(<4 x i8>* %x) { ; CHECK-LABEL: vwreduce_add_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -290,11 +279,10 @@ define i16 @vwreduce_uadd_v4i16(<4 x i8>* %x) { ; CHECK-LABEL: vwreduce_uadd_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -312,7 +300,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -326,7 +314,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -343,7 +331,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -362,7 +350,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -374,12 +362,11 @@ define i16 @vwreduce_add_v16i16(<16 x i8>* %x) { ; CHECK-LABEL: vwreduce_add_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vwredsum.vs v8, v8, v9 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vwredsum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -392,12 +379,11 @@ define i16 @vwreduce_uadd_v16i16(<16 x i8>* %x) { ; CHECK-LABEL: vwreduce_uadd_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vwredsumu.vs v8, v8, v9 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vwredsumu.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -415,9 +401,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -430,12 +414,11 @@ ; CHECK-LABEL: vwreduce_add_v32i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vwredsum.vs v8, v8, v10 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vwredsum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -449,12 +432,11 @@ ; CHECK-LABEL: vwreduce_uadd_v32i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vwredsumu.vs v8, v8, v10 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vwredsumu.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -472,9 +454,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -487,12 +467,11 @@ ; CHECK-LABEL: vwreduce_add_v64i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vwredsum.vs v8, v8, v12 +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma +; CHECK-NEXT: vwredsum.vs v8, v8, v16 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -506,12 +485,11 @@ ; CHECK-LABEL: vwreduce_uadd_v64i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vwredsumu.vs v8, v8, v12 +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma +; CHECK-NEXT: vwredsumu.vs v8, v8, v16 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -532,9 +510,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vadd.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -554,9 +530,8 @@ ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vredsum.vs v8, v24, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -577,9 +552,8 @@ ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vwaddu.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vredsum.vs v8, v24, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -638,7 +612,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -650,11 +624,10 @@ define i32 @vwreduce_add_v2i32(<2 x i16>* %x) { ; CHECK-LABEL: vwreduce_add_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -668,11 +641,10 @@ define i32 @vwreduce_uadd_v2i32(<2 x i16>* %x) { ; CHECK-LABEL: vwreduce_uadd_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -690,7 +662,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -704,7 +676,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -721,7 +693,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -740,7 +712,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -752,12 +724,11 @@ define i32 @vwreduce_add_v8i32(<8 x i16>* %x) { ; CHECK-LABEL: vwreduce_add_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vwredsum.vs v8, v8, v9 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vwredsum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -770,12 +741,11 @@ define i32 @vwreduce_uadd_v8i32(<8 x i16>* %x) { ; CHECK-LABEL: vwreduce_uadd_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vwredsumu.vs v8, v8, v9 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vwredsumu.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -792,7 +762,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v12, zero +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -804,12 +774,11 @@ define i32 @vwreduce_add_v16i32(<16 x i16>* %x) { ; CHECK-LABEL: vwreduce_add_v16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vwredsum.vs v8, v8, v10 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vwredsum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -822,12 +791,11 @@ define i32 @vwreduce_uadd_v16i32(<16 x i16>* %x) { ; CHECK-LABEL: vwreduce_uadd_v16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vwredsumu.vs v8, v8, v10 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vwredsumu.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -845,9 +813,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -860,12 +826,11 @@ ; CHECK-LABEL: vwreduce_add_v32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vwredsum.vs v8, v8, v12 +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vwredsum.vs v8, v8, v16 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -879,12 +844,11 @@ ; CHECK-LABEL: vwreduce_uadd_v32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vwredsumu.vs v8, v8, v12 +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vwredsumu.vs v8, v8, v16 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -905,9 +869,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vadd.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -927,9 +889,8 @@ ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vredsum.vs v8, v24, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -950,9 +911,8 @@ ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vwaddu.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vredsum.vs v8, v24, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1043,7 +1003,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v9, zero +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vredsum.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1056,7 +1016,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v9, zero +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vredsum.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1070,7 +1030,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vmv.s.x v9, zero +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV32-NEXT: vwredsum.vs v8, v8, v9 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -1085,7 +1045,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vmv.s.x v9, zero +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -1102,7 +1062,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vmv.s.x v9, zero +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV32-NEXT: vwredsumu.vs v8, v8, v9 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -1117,7 +1077,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vmv.s.x v9, zero +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -1136,7 +1096,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v10, zero +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: vredsum.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1149,7 +1109,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v10, zero +; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: vredsum.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1161,12 +1121,11 @@ define i64 @vwreduce_add_v4i64(<4 x i32>* %x) { ; RV32-LABEL: vwreduce_add_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vwredsum.vs v8, v8, v9 +; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32-NEXT: vwredsum.vs v8, v8, v10 ; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1177,12 +1136,11 @@ ; ; RV64-LABEL: vwreduce_add_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vwredsum.vs v8, v8, v9 +; RV64-NEXT: vmv.v.i v10, 0 +; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64-NEXT: vwredsum.vs v8, v8, v10 ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1195,12 +1153,11 @@ define i64 @vwreduce_uadd_v4i64(<4 x i32>* %x) { ; RV32-LABEL: vwreduce_uadd_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vwredsumu.vs v8, v8, v9 +; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32-NEXT: vwredsumu.vs v8, v8, v10 ; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1211,12 +1168,11 @@ ; ; RV64-LABEL: vwreduce_uadd_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vwredsumu.vs v8, v8, v9 +; RV64-NEXT: vmv.v.i v10, 0 +; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64-NEXT: vwredsumu.vs v8, v8, v10 ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1233,7 +1189,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v12, zero +; RV32-NEXT: vmv.v.i v12, 0 ; RV32-NEXT: vredsum.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1246,7 +1202,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v12, zero +; RV64-NEXT: vmv.v.i v12, 0 ; RV64-NEXT: vredsum.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1258,12 +1214,11 @@ define i64 @vwreduce_add_v8i64(<8 x i32>* %x) { ; RV32-LABEL: vwreduce_add_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v10, zero -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vwredsum.vs v8, v8, v10 +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vwredsum.vs v8, v8, v12 ; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1274,12 +1229,11 @@ ; ; RV64-LABEL: vwreduce_add_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, zero -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vwredsum.vs v8, v8, v10 +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vwredsum.vs v8, v8, v12 ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1292,12 +1246,11 @@ define i64 @vwreduce_uadd_v8i64(<8 x i32>* %x) { ; RV32-LABEL: vwreduce_uadd_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v10, zero -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vwredsumu.vs v8, v8, v10 +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vwredsumu.vs v8, v8, v12 ; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1308,12 +1261,11 @@ ; ; RV64-LABEL: vwreduce_uadd_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, zero -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vwredsumu.vs v8, v8, v10 +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vwredsumu.vs v8, v8, v12 ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1330,7 +1282,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v16, zero +; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vredsum.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1343,7 +1295,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v16, zero +; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vredsum.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1355,12 +1307,11 @@ define i64 @vwreduce_add_v16i64(<16 x i32>* %x) { ; RV32-LABEL: vwreduce_add_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v12, zero -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vwredsum.vs v8, v8, v12 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vwredsum.vs v8, v8, v16 ; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1371,12 +1322,11 @@ ; ; RV64-LABEL: vwreduce_add_v16i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, zero -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-NEXT: vwredsum.vs v8, v8, v12 +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV64-NEXT: vwredsum.vs v8, v8, v16 ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1389,12 +1339,11 @@ define i64 @vwreduce_uadd_v16i64(<16 x i32>* %x) { ; RV32-LABEL: vwreduce_uadd_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v12, zero -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vwredsumu.vs v8, v8, v12 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vwredsumu.vs v8, v8, v16 ; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1405,12 +1354,11 @@ ; ; RV64-LABEL: vwreduce_uadd_v16i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, zero -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-NEXT: vwredsumu.vs v8, v8, v12 +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV64-NEXT: vwredsumu.vs v8, v8, v16 ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1429,9 +1377,9 @@ ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) -; RV32-NEXT: vmv.s.x v24, zero ; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: vredsum.vs v8, v8, v24 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vredsum.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1445,9 +1393,9 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) -; RV64-NEXT: vmv.s.x v24, zero ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vredsum.vs v8, v8, v24 +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vredsum.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, <32 x i64>* %x @@ -1465,9 +1413,8 @@ ; RV32-NEXT: vslidedown.vi v16, v8, 16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vwadd.vv v24, v8, v16 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v8, zero -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: vredsum.vs v8, v24, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1484,9 +1431,8 @@ ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwadd.vv v24, v8, v16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v8, zero -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vmv.v.i v8, 0 ; RV64-NEXT: vredsum.vs v8, v24, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1506,9 +1452,8 @@ ; RV32-NEXT: vslidedown.vi v16, v8, 16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vwaddu.vv v24, v8, v16 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v8, zero -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: vredsum.vs v8, v24, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1525,9 +1470,8 @@ ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwaddu.vv v24, v8, v16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v8, zero -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vmv.v.i v8, 0 ; RV64-NEXT: vredsum.vs v8, v24, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1553,7 +1497,7 @@ ; RV32-NEXT: vadd.vv v16, v24, v16 ; RV32-NEXT: vadd.vv v8, v8, v0 ; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: vmv.s.x v16, zero +; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vredsum.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1575,7 +1519,7 @@ ; RV64-NEXT: vadd.vv v16, v24, v16 ; RV64-NEXT: vadd.vv v8, v8, v0 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vmv.s.x v16, zero +; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vredsum.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1641,7 +1585,7 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v0, v8 -; RV32-NEXT: vmv.s.x v16, zero +; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vredsum.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1709,7 +1653,7 @@ ; RV64-NEXT: addi a0, a0, 16 ; RV64-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vadd.vv v8, v0, v8 -; RV64-NEXT: vmv.s.x v16, zero +; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vredsum.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: csrr a1, vlenb @@ -1780,7 +1724,7 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v0, v8 -; RV32-NEXT: vmv.s.x v16, zero +; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vredsum.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1848,7 +1792,7 @@ ; RV64-NEXT: addi a0, a0, 16 ; RV64-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vadd.vv v8, v0, v8 -; RV64-NEXT: vmv.s.x v16, zero +; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vredsum.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: csrr a1, vlenb @@ -1883,9 +1827,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1901,9 +1843,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1919,9 +1859,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1937,9 +1875,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1956,9 +1892,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1975,9 +1909,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, -1 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1994,9 +1926,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2016,9 +1946,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vand.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2048,9 +1976,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2066,9 +1992,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2084,9 +2008,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2102,9 +2024,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2121,9 +2041,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, -1 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2140,9 +2058,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2162,9 +2078,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vand.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2194,9 +2108,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2212,9 +2124,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2230,9 +2140,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2248,9 +2156,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, -1 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2267,9 +2173,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2289,9 +2193,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vand.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2331,9 +2233,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.v.i v9, -1 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vredand.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -2346,9 +2246,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v9, -1 -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2364,9 +2262,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.v.i v10, -1 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vredand.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -2379,9 +2275,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v10, -1 -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2397,9 +2291,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.v.i v12, -1 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vredand.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -2412,9 +2304,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v12, -1 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2430,9 +2320,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vredand.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -2445,9 +2333,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, -1 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2466,9 +2352,7 @@ ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vredand.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -2484,9 +2368,7 @@ ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, -1 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2511,9 +2393,7 @@ ; RV32-NEXT: vand.vv v16, v24, v16 ; RV32-NEXT: vand.vv v8, v8, v0 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vredand.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -2535,9 +2415,7 @@ ; RV64-NEXT: vand.vv v16, v24, v16 ; RV64-NEXT: vand.vv v8, v8, v0 ; RV64-NEXT: vand.vv v8, v8, v16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, -1 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2567,7 +2445,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2583,7 +2461,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2599,7 +2477,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2615,7 +2493,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2632,9 +2510,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2651,9 +2527,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vredor.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2670,9 +2544,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2692,9 +2564,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2724,7 +2594,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2740,7 +2610,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2756,7 +2626,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2772,7 +2642,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2789,9 +2659,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vredor.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2808,9 +2676,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2830,9 +2696,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2862,7 +2726,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2878,7 +2742,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2894,7 +2758,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2910,7 +2774,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v12, zero +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vredor.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2927,9 +2791,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2949,9 +2811,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2991,7 +2851,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v9, zero +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vredor.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -3004,7 +2864,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v9, zero +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vredor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -3020,7 +2880,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v10, zero +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: vredor.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -3033,7 +2893,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v10, zero +; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: vredor.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -3049,7 +2909,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v12, zero +; RV32-NEXT: vmv.v.i v12, 0 ; RV32-NEXT: vredor.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -3062,7 +2922,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v12, zero +; RV64-NEXT: vmv.v.i v12, 0 ; RV64-NEXT: vredor.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -3078,7 +2938,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v16, zero +; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vredor.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -3091,7 +2951,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v16, zero +; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vredor.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -3109,9 +2969,9 @@ ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) -; RV32-NEXT: vmv.s.x v24, zero ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vredor.vs v8, v8, v24 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vredor.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -3125,9 +2985,9 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) -; RV64-NEXT: vmv.s.x v24, zero ; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vredor.vs v8, v8, v24 +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vredor.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, <32 x i64>* %x @@ -3151,7 +3011,7 @@ ; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: vor.vv v8, v8, v0 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vmv.s.x v16, zero +; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vredor.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -3173,7 +3033,7 @@ ; RV64-NEXT: vor.vv v16, v24, v16 ; RV64-NEXT: vor.vv v8, v8, v0 ; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vmv.s.x v16, zero +; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vredor.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -3203,7 +3063,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3219,7 +3079,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3235,7 +3095,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3251,7 +3111,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3268,9 +3128,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3287,9 +3145,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3306,9 +3162,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3328,9 +3182,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vxor.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3360,7 +3212,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3376,7 +3228,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3392,7 +3244,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3408,7 +3260,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3425,9 +3277,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3444,9 +3294,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3466,9 +3314,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vxor.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3498,7 +3344,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3514,7 +3360,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3530,7 +3376,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3546,7 +3392,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v12, zero +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3563,9 +3409,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3585,9 +3429,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vxor.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3627,7 +3469,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v9, zero +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vredxor.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -3640,7 +3482,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v9, zero +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vredxor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -3656,7 +3498,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v10, zero +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: vredxor.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -3669,7 +3511,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v10, zero +; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: vredxor.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -3685,7 +3527,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v12, zero +; RV32-NEXT: vmv.v.i v12, 0 ; RV32-NEXT: vredxor.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -3698,7 +3540,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v12, zero +; RV64-NEXT: vmv.v.i v12, 0 ; RV64-NEXT: vredxor.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -3714,7 +3556,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v16, zero +; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vredxor.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -3727,7 +3569,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v16, zero +; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vredxor.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -3745,9 +3587,9 @@ ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) -; RV32-NEXT: vmv.s.x v24, zero ; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: vredxor.vs v8, v8, v24 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vredxor.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -3761,9 +3603,9 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) -; RV64-NEXT: vmv.s.x v24, zero ; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: vredxor.vs v8, v8, v24 +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vredxor.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, <32 x i64>* %x @@ -3787,7 +3629,7 @@ ; RV32-NEXT: vxor.vv v16, v24, v16 ; RV32-NEXT: vxor.vv v8, v8, v0 ; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: vmv.s.x v16, zero +; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vredxor.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -3809,7 +3651,7 @@ ; RV64-NEXT: vxor.vv v16, v24, v16 ; RV64-NEXT: vxor.vv v8, v8, v0 ; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: vmv.s.x v16, zero +; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vredxor.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -3840,7 +3682,7 @@ ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3857,7 +3699,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3874,7 +3716,7 @@ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3891,7 +3733,7 @@ ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3909,9 +3751,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3929,9 +3769,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3949,9 +3787,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3972,9 +3808,7 @@ ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vmin.vv v8, v8, v16 ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4006,7 +3840,7 @@ ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v9, a0 +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4017,7 +3851,7 @@ ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4035,7 +3869,7 @@ ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v9, a0 +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4046,7 +3880,7 @@ ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4064,7 +3898,7 @@ ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v9, a0 +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4075,7 +3909,7 @@ ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4093,7 +3927,7 @@ ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v10, a0 +; RV32-NEXT: vmv.v.x v10, a0 ; RV32-NEXT: vredmin.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4104,7 +3938,7 @@ ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v10, a0 +; RV64-NEXT: vmv.v.x v10, a0 ; RV64-NEXT: vredmin.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4123,9 +3957,7 @@ ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v12, a0 -; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; RV32-NEXT: vmv.v.x v12, a0 ; RV32-NEXT: vredmin.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4137,9 +3969,7 @@ ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vredmin.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4158,9 +3988,7 @@ ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v16, a0 -; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a0 ; RV32-NEXT: vredmin.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4172,9 +4000,7 @@ ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; RV64-NEXT: vmv.v.x v16, a0 ; RV64-NEXT: vredmin.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4196,9 +4022,7 @@ ; RV32-NEXT: vmin.vv v8, v8, v16 ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v16, a0 -; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a0 ; RV32-NEXT: vredmin.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4213,9 +4037,7 @@ ; RV64-NEXT: vmin.vv v8, v8, v16 ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; RV64-NEXT: vmv.v.x v16, a0 ; RV64-NEXT: vredmin.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4247,7 +4069,7 @@ ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v9, a0 +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4258,7 +4080,7 @@ ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4276,7 +4098,7 @@ ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v9, a0 +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4287,7 +4109,7 @@ ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4305,7 +4127,7 @@ ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v10, a0 +; RV32-NEXT: vmv.v.x v10, a0 ; RV32-NEXT: vredmin.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4316,7 +4138,7 @@ ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v10, a0 +; RV64-NEXT: vmv.v.x v10, a0 ; RV64-NEXT: vredmin.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4334,7 +4156,7 @@ ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v12, a0 +; RV32-NEXT: vmv.v.x v12, a0 ; RV32-NEXT: vredmin.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4345,7 +4167,7 @@ ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v12, a0 +; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vredmin.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4364,9 +4186,7 @@ ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v16, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a0 ; RV32-NEXT: vredmin.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4378,9 +4198,7 @@ ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV64-NEXT: vmv.v.x v16, a0 ; RV64-NEXT: vredmin.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4402,9 +4220,7 @@ ; RV32-NEXT: vmin.vv v8, v8, v16 ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v16, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a0 ; RV32-NEXT: vredmin.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4419,9 +4235,7 @@ ; RV64-NEXT: vmin.vv v8, v8, v16 ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV64-NEXT: vmv.v.x v16, a0 ; RV64-NEXT: vredmin.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4469,9 +4283,7 @@ ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -4487,7 +4299,7 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4511,9 +4323,7 @@ ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -4529,7 +4339,7 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vmv.s.x v10, a0 +; RV64-NEXT: vmv.v.x v10, a0 ; RV64-NEXT: vredmin.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4553,9 +4363,7 @@ ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -4571,7 +4379,7 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vmv.s.x v12, a0 +; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vredmin.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4595,9 +4403,7 @@ ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -4613,7 +4419,7 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vmv.s.x v16, a0 +; RV64-NEXT: vmv.v.x v16, a0 ; RV64-NEXT: vredmin.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4638,12 +4444,10 @@ ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: vmin.vv v8, v8, v16 ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vredmin.vs v8, v8, v16 +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vmin.vv v8, v8, v16 +; RV32-NEXT: vredmin.vs v8, v8, v24 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -4658,11 +4462,11 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vmin.vv v8, v8, v16 ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vmv.s.x v24, a0 -; RV64-NEXT: vmin.vv v8, v8, v16 -; RV64-NEXT: vredmin.vs v8, v8, v24 +; RV64-NEXT: vmv.v.x v16, a0 +; RV64-NEXT: vredmin.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, <32 x i64>* %x @@ -4676,8 +4480,13 @@ ; RV32-LABEL: vreduce_smin_v64i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: addi a1, a0, 256 ; RV32-NEXT: vle64.v v16, (a1) ; RV32-NEXT: addi a1, a0, 384 @@ -4689,19 +4498,22 @@ ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: vmin.vv v24, v0, v24 -; RV32-NEXT: vmin.vv v8, v8, v16 -; RV32-NEXT: vmin.vv v8, v8, v24 ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vredmin.vs v8, v8, v16 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vmin.vv v24, v0, v24 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmin.vv v16, v0, v16 +; RV32-NEXT: vmin.vv v16, v16, v24 +; RV32-NEXT: vredmin.vs v8, v16, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add sp, sp, a2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -4720,7 +4532,7 @@ ; RV64-NEXT: vmin.vv v8, v8, v16 ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vmv.s.x v16, a0 +; RV64-NEXT: vmv.v.x v16, a0 ; RV64-NEXT: vredmin.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4751,7 +4563,7 @@ ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4768,7 +4580,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4785,7 +4597,7 @@ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4802,7 +4614,7 @@ ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4820,9 +4632,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4840,9 +4650,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4860,9 +4668,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4883,9 +4689,7 @@ ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4916,7 +4720,7 @@ ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4933,7 +4737,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4950,7 +4754,7 @@ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4967,7 +4771,7 @@ ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4985,9 +4789,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5005,9 +4807,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5028,9 +4828,7 @@ ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5061,7 +4859,7 @@ ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5078,7 +4876,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5095,7 +4893,7 @@ ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5112,7 +4910,7 @@ ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vmv.s.x v12, a0 +; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5130,9 +4928,7 @@ ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5153,9 +4949,7 @@ ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5201,9 +4995,7 @@ ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: sw zero, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vredmax.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -5219,7 +5011,7 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmax.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -5241,9 +5033,7 @@ ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: sw zero, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vredmax.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -5259,7 +5049,7 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vmv.s.x v10, a0 +; RV64-NEXT: vmv.v.x v10, a0 ; RV64-NEXT: vredmax.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -5281,9 +5071,7 @@ ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: sw zero, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vredmax.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -5299,7 +5087,7 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vmv.s.x v12, a0 +; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vredmax.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -5321,9 +5109,7 @@ ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: sw zero, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vredmax.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -5339,7 +5125,7 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vmv.s.x v16, a0 +; RV64-NEXT: vmv.v.x v16, a0 ; RV64-NEXT: vredmax.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -5362,12 +5148,10 @@ ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: vmax.vv v8, v8, v16 ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vredmax.vs v8, v8, v16 +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vmax.vv v8, v8, v16 +; RV32-NEXT: vredmax.vs v8, v8, v24 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5382,11 +5166,11 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vmax.vv v8, v8, v16 ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vmv.s.x v24, a0 -; RV64-NEXT: vmax.vv v8, v8, v16 -; RV64-NEXT: vredmax.vs v8, v8, v24 +; RV64-NEXT: vmv.v.x v16, a0 +; RV64-NEXT: vredmax.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, <32 x i64>* %x @@ -5400,8 +5184,13 @@ ; RV32-LABEL: vreduce_smax_v64i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: addi a1, a0, 256 ; RV32-NEXT: vle64.v v16, (a1) ; RV32-NEXT: addi a1, a0, 384 @@ -5411,19 +5200,22 @@ ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: vmax.vv v24, v0, v24 -; RV32-NEXT: vmax.vv v8, v8, v16 -; RV32-NEXT: vmax.vv v8, v8, v24 ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vredmax.vs v8, v8, v16 +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: vmax.vv v24, v0, v24 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmax.vv v16, v0, v16 +; RV32-NEXT: vmax.vv v16, v16, v24 +; RV32-NEXT: vredmax.vs v8, v16, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add sp, sp, a2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -5442,7 +5234,7 @@ ; RV64-NEXT: vmax.vv v8, v8, v16 ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vmv.s.x v16, a0 +; RV64-NEXT: vmv.v.x v16, a0 ; RV64-NEXT: vredmax.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -5472,9 +5264,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5490,9 +5280,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5508,9 +5296,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5526,9 +5312,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5545,9 +5329,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5564,9 +5346,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, -1 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5583,9 +5363,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5605,9 +5383,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vminu.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5637,9 +5413,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5655,9 +5429,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5673,9 +5445,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5691,9 +5461,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5710,9 +5478,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, -1 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5729,9 +5495,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5751,9 +5515,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vminu.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5783,9 +5545,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5801,9 +5561,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5819,9 +5577,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, -1 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5837,9 +5593,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v12, -1 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5856,9 +5610,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5878,9 +5630,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vminu.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v16, -1 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5920,9 +5670,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.v.i v9, -1 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vredminu.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -5935,9 +5683,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v9, -1 -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -5953,9 +5699,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.v.i v10, -1 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vredminu.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -5968,9 +5712,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v10, -1 -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -5986,9 +5728,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.v.i v12, -1 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vredminu.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -6001,9 +5741,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v12, -1 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -6019,9 +5757,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vredminu.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -6034,9 +5770,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, -1 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -6055,9 +5789,7 @@ ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) ; RV32-NEXT: vminu.vv v8, v8, v16 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vredminu.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -6073,9 +5805,7 @@ ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vminu.vv v8, v8, v16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, -1 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -6100,9 +5830,7 @@ ; RV32-NEXT: vminu.vv v16, v24, v16 ; RV32-NEXT: vminu.vv v8, v8, v0 ; RV32-NEXT: vminu.vv v8, v8, v16 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vredminu.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -6124,9 +5852,7 @@ ; RV64-NEXT: vminu.vv v16, v24, v16 ; RV64-NEXT: vminu.vv v8, v8, v0 ; RV64-NEXT: vminu.vv v8, v8, v16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, -1 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -6156,7 +5882,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6172,7 +5898,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6188,7 +5914,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6204,7 +5930,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6221,9 +5947,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6240,9 +5964,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6259,9 +5981,7 @@ ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6281,9 +6001,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vmaxu.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6313,7 +6031,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6329,7 +6047,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6345,7 +6063,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6361,7 +6079,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6378,9 +6096,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6397,9 +6113,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6419,9 +6133,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vmaxu.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6451,7 +6163,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6467,7 +6179,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6483,7 +6195,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6499,7 +6211,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v12, zero +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v12 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6516,9 +6228,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6538,9 +6248,7 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vmaxu.vv v8, v8, v16 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, zero -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6580,7 +6288,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v9, zero +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vredmaxu.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -6593,7 +6301,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v9, zero +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vredmaxu.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -6609,7 +6317,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v10, zero +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: vredmaxu.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -6622,7 +6330,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v10, zero +; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: vredmaxu.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -6638,7 +6346,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v12, zero +; RV32-NEXT: vmv.v.i v12, 0 ; RV32-NEXT: vredmaxu.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -6651,7 +6359,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v12, zero +; RV64-NEXT: vmv.v.i v12, 0 ; RV64-NEXT: vredmaxu.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -6667,7 +6375,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v16, zero +; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vredmaxu.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -6680,7 +6388,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v16, zero +; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vredmaxu.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -6698,9 +6406,9 @@ ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) -; RV32-NEXT: vmv.s.x v24, zero ; RV32-NEXT: vmaxu.vv v8, v8, v16 -; RV32-NEXT: vredmaxu.vs v8, v8, v24 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: vredmaxu.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -6714,9 +6422,9 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) -; RV64-NEXT: vmv.s.x v24, zero ; RV64-NEXT: vmaxu.vv v8, v8, v16 -; RV64-NEXT: vredmaxu.vs v8, v8, v24 +; RV64-NEXT: vmv.v.i v16, 0 +; RV64-NEXT: vredmaxu.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, <32 x i64>* %x @@ -6740,7 +6448,7 @@ ; RV32-NEXT: vmaxu.vv v16, v24, v16 ; RV32-NEXT: vmaxu.vv v8, v8, v0 ; RV32-NEXT: vmaxu.vv v8, v8, v16 -; RV32-NEXT: vmv.s.x v16, zero +; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vredmaxu.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -6762,7 +6470,7 @@ ; RV64-NEXT: vmaxu.vv v16, v24, v16 ; RV64-NEXT: vmaxu.vv v8, v8, v0 ; RV64-NEXT: vmaxu.vv v8, v8, v16 -; RV64-NEXT: vmv.s.x v16, zero +; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vredmaxu.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll --- a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll @@ -4,11 +4,11 @@ define i64 @reduce_add(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_add: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v10 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v) @@ -19,11 +19,11 @@ define i64 @reduce_add2(<4 x i64> %v) { ; CHECK-LABEL: reduce_add2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: addi a0, a0, 8 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v) @@ -34,11 +34,11 @@ define i64 @reduce_and(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_and: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vredand.vs v8, v8, v10 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v) @@ -49,11 +49,11 @@ define i64 @reduce_and2(<4 x i64> %v) { ; CHECK-LABEL: reduce_and2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vredand.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: andi a0, a0, 8 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v) @@ -64,11 +64,11 @@ define i64 @reduce_or(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_or: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredor.vs v8, v8, v10 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: or a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v) @@ -79,11 +79,11 @@ define i64 @reduce_or2(<4 x i64> %v) { ; CHECK-LABEL: reduce_or2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ori a0, a0, 8 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v) @@ -94,11 +94,11 @@ define i64 @reduce_xor(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_xor: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v10 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: xor a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v) @@ -109,9 +109,8 @@ define i64 @reduce_xor2(<4 x i64> %v) { ; CHECK-LABEL: reduce_xor2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: andi a0, a0, 8 @@ -125,11 +124,11 @@ define i64 @reduce_umax(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_umax: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v10 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: maxu a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v) @@ -140,11 +139,12 @@ define i64 @reduce_umax2(<4 x i64> %v) { ; CHECK-LABEL: reduce_umax2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: li a1, 8 +; CHECK-NEXT: maxu a0, a0, a1 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v) @@ -155,11 +155,11 @@ define i64 @reduce_umin(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_umin: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v10 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: minu a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v) @@ -170,11 +170,12 @@ define i64 @reduce_umin2(<4 x i64> %v) { ; CHECK-LABEL: reduce_umin2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: li a1, 8 +; CHECK-NEXT: minu a0, a0, a1 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v) @@ -185,11 +186,13 @@ define i64 @reduce_smax(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_smax: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: slli a1, a1, 63 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 ; CHECK-NEXT: vredmax.vs v8, v8, v10 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: max a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v) @@ -200,11 +203,14 @@ define i64 @reduce_smax2(<4 x i64> %v) { ; CHECK-LABEL: reduce_smax2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 8 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: slli a0, a0, 63 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: li a1, 8 +; CHECK-NEXT: max a0, a0, a1 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v) @@ -215,11 +221,13 @@ define i64 @reduce_smin(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_smin: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: srli a1, a1, 1 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 ; CHECK-NEXT: vredmin.vs v8, v8, v10 -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: min a0, a1, a0 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v) @@ -230,11 +238,14 @@ define i64 @reduce_smin2(<4 x i64> %v) { ; CHECK-LABEL: reduce_smin2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 8 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: li a1, 8 +; CHECK-NEXT: min a0, a0, a1 ; CHECK-NEXT: ret entry: %rdx = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v) @@ -245,11 +256,11 @@ define float @reduce_fadd(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fadd: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret entry: %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float %x, <4 x float> %v) @@ -259,11 +270,11 @@ define float @reduce_fadd2(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fadd2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, ft0, fa0 ; CHECK-NEXT: ret entry: %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> %v) @@ -274,11 +285,13 @@ define float @reduce_fmax(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fmax: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI18_0) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fmax.s fa0, fa0, ft0 ; CHECK-NEXT: ret entry: %rdx = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) @@ -289,11 +302,13 @@ define float @reduce_fmin(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fmin: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fmin.s fa0, fa0, ft0 ; CHECK-NEXT: ret entry: %rdx = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -1,19 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare half @llvm.vector.reduce.fadd.nxv1f16(half, ) define half @vreduce_fadd_nxv1f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: fneg.h ft0, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, %v) ret half %red @@ -22,9 +24,8 @@ define half @vreduce_ord_fadd_nxv1f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -37,11 +38,13 @@ define half @vreduce_fadd_nxv2f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: fneg.h ft0, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, %v) ret half %red @@ -50,9 +53,8 @@ define half @vreduce_ord_fadd_nxv2f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -65,11 +67,13 @@ define half @vreduce_fadd_nxv4f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: fneg.h ft0, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, %v) ret half %red @@ -78,9 +82,8 @@ define half @vreduce_ord_fadd_nxv4f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -93,11 +96,13 @@ define float @vreduce_fadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_fadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) ret float %red @@ -106,9 +111,8 @@ define float @vreduce_ord_fadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -119,12 +123,15 @@ define float @vreduce_fwadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_fwadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %e = fpext %v to %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, %e) @@ -134,9 +141,9 @@ define float @vreduce_ord_fwadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -151,11 +158,13 @@ define float @vreduce_fadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: vreduce_fadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, %v) ret float %red @@ -164,9 +173,8 @@ define float @vreduce_ord_fadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -177,12 +185,15 @@ define float @vreduce_fwadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: vreduce_fwadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %e = fpext %v to %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, %e) @@ -192,9 +203,9 @@ define float @vreduce_ord_fwadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -209,11 +220,13 @@ define float @vreduce_fadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: vreduce_fadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, %v) ret float %red @@ -222,9 +235,8 @@ define float @vreduce_ord_fadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -235,12 +247,15 @@ define float @vreduce_fwadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: vreduce_fwadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwredusum.vs v8, v8, v9 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, ft0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwredusum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %e = fpext %v to %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, %e) @@ -250,10 +265,10 @@ define float @vreduce_ord_fwadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwredosum.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwredosum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -265,14 +280,27 @@ declare double @llvm.vector.reduce.fadd.nxv1f64(double, ) define double @vreduce_fadd_nxv1f64( %v, double %s) { -; CHECK-LABEL: vreduce_fadd_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fadd_nxv1f64: +; RV32: # %bb.0: +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfmv.v.f v9, ft0 +; RV32-NEXT: vfredusum.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fadd_nxv1f64: +; RV64: # %bb.0: +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfmv.v.f v9, ft0 +; RV64-NEXT: vfredusum.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, %v) ret double %red } @@ -280,9 +308,8 @@ define double @vreduce_ord_fadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -291,15 +318,31 @@ } define double @vreduce_fwadd_nxv1f64( %v, double %s) { -; CHECK-LABEL: vreduce_fwadd_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfwredusum.vs v8, v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fwadd_nxv1f64: +; RV32: # %bb.0: +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfmv.v.f v9, ft0 +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfwredusum.vs v8, v8, v9 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fwadd_nxv1f64: +; RV64: # %bb.0: +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfmv.v.f v9, ft0 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfwredusum.vs v8, v8, v9 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %e = fpext %v to %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, %e) ret double %red @@ -308,9 +351,9 @@ define double @vreduce_ord_fwadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -323,14 +366,27 @@ declare double @llvm.vector.reduce.fadd.nxv2f64(double, ) define double @vreduce_fadd_nxv2f64( %v, double %s) { -; CHECK-LABEL: vreduce_fadd_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vfredusum.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fadd_nxv2f64: +; RV32: # %bb.0: +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32-NEXT: vfmv.v.f v10, ft0 +; RV32-NEXT: vfredusum.vs v8, v8, v10 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fadd_nxv2f64: +; RV64: # %bb.0: +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vfmv.v.f v10, ft0 +; RV64-NEXT: vfredusum.vs v8, v8, v10 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, %v) ret double %red } @@ -338,9 +394,8 @@ define double @vreduce_ord_fadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -349,15 +404,31 @@ } define double @vreduce_fwadd_nxv2f64( %v, double %s) { -; CHECK-LABEL: vreduce_fwadd_nxv2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfwredusum.vs v8, v8, v9 -; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fwadd_nxv2f64: +; RV32: # %bb.0: +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32-NEXT: vfmv.v.f v10, ft0 +; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32-NEXT: vfwredusum.vs v8, v8, v10 +; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fwadd_nxv2f64: +; RV64: # %bb.0: +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vfmv.v.f v10, ft0 +; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64-NEXT: vfwredusum.vs v8, v8, v10 +; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %e = fpext %v to %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, %e) ret double %red @@ -366,10 +437,10 @@ define double @vreduce_ord_fwadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfwredosum.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfwredosum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -381,14 +452,27 @@ declare double @llvm.vector.reduce.fadd.nxv4f64(double, ) define double @vreduce_fadd_nxv4f64( %v, double %s) { -; CHECK-LABEL: vreduce_fadd_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vfredusum.vs v8, v8, v12 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fadd_nxv4f64: +; RV32: # %bb.0: +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfmv.v.f v12, ft0 +; RV32-NEXT: vfredusum.vs v8, v8, v12 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fadd_nxv4f64: +; RV64: # %bb.0: +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfmv.v.f v12, ft0 +; RV64-NEXT: vfredusum.vs v8, v8, v12 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, %v) ret double %red } @@ -396,9 +480,8 @@ define double @vreduce_ord_fadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -407,15 +490,31 @@ } define double @vreduce_fwadd_nxv4f64( %v, double %s) { -; CHECK-LABEL: vreduce_fwadd_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vfwredusum.vs v8, v8, v10 -; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fwadd_nxv4f64: +; RV32: # %bb.0: +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vfmv.v.f v12, ft0 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vfwredusum.vs v8, v8, v12 +; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fwadd_nxv4f64: +; RV64: # %bb.0: +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vfmv.v.f v12, ft0 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vfwredusum.vs v8, v8, v12 +; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %e = fpext %v to %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, %e) ret double %red @@ -424,10 +523,10 @@ define double @vreduce_ord_fwadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vfwredosum.vs v8, v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v12, fa0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfwredosum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -443,9 +542,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI30_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -458,9 +556,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI31_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI31_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -473,9 +570,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI32_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI32_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -490,9 +586,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI33_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI33_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -507,9 +602,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI34_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI34_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -523,13 +617,11 @@ ; CHECK-LABEL: vreduce_fmin_nxv64f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI35_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI35_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v16, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v16 +; CHECK-NEXT: vlse16.v v24, (a0), zero +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vfredmin.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fmin.nxv64f16( %v) @@ -543,9 +635,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI36_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI36_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -558,9 +649,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI37_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI37_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -573,9 +663,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI38_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI38_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -590,9 +679,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI39_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI39_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -607,9 +695,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI40_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI40_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vlse32.v v10, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -623,13 +710,11 @@ ; CHECK-LABEL: vreduce_fmin_nxv32f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI41_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI41_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v16, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v16 +; CHECK-NEXT: vlse32.v v24, (a0), zero +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vfredmin.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call float @llvm.vector.reduce.fmin.nxv32f32( %v) @@ -643,9 +728,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI42_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI42_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -658,9 +742,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI43_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI43_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -673,9 +756,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI44_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI44_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -690,9 +772,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI45_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI45_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -707,9 +788,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI46_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vlse64.v v12, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -723,13 +803,11 @@ ; CHECK-LABEL: vreduce_fmin_nxv16f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI47_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI47_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfredmin.vs v8, v8, v16 +; CHECK-NEXT: vlse64.v v24, (a0), zero +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vfredmin.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call double @llvm.vector.reduce.fmin.nxv16f64( %v) @@ -743,9 +821,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI48_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI48_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -758,9 +835,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI49_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI49_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -773,9 +849,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI50_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI50_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -790,9 +865,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI51_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI51_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -807,9 +881,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI52_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI52_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -823,13 +896,11 @@ ; CHECK-LABEL: vreduce_fmax_nxv64f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI53_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI53_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vlse16.v v16, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v16 +; CHECK-NEXT: vlse16.v v24, (a0), zero +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vfredmax.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fmax.nxv64f16( %v) @@ -843,9 +914,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI54_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI54_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -858,9 +928,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI55_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI55_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -873,9 +942,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI56_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI56_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -890,9 +958,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI57_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI57_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -907,9 +974,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI58_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI58_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vlse32.v v10, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -923,13 +989,11 @@ ; CHECK-LABEL: vreduce_fmax_nxv32f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI59_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI59_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vlse32.v v16, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v16 +; CHECK-NEXT: vlse32.v v24, (a0), zero +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vfredmax.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call float @llvm.vector.reduce.fmax.nxv32f32( %v) @@ -943,9 +1007,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI60_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI60_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -958,9 +1021,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI61_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI61_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -973,9 +1035,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI62_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI62_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -990,9 +1051,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI63_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI63_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1007,9 +1067,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI64_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI64_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vlse64.v v12, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1023,13 +1082,11 @@ ; CHECK-LABEL: vreduce_fmax_nxv16f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: lui a0, %hi(.LCPI65_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI65_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vfredmax.vs v8, v8, v16 +; CHECK-NEXT: vlse64.v v24, (a0), zero +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vfredmax.vs v8, v8, v24 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %red = call double @llvm.vector.reduce.fmax.nxv16f64( %v) @@ -1039,11 +1096,11 @@ define float @vreduce_nsz_fadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_nsz_fadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) ret float %red @@ -1066,9 +1123,8 @@ ; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1090,9 +1146,8 @@ ; CHECK-NEXT: vfmv.v.f v10, ft0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v9, v10, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1118,9 +1173,8 @@ ; CHECK-NEXT: vslideup.vi v11, v12, 0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v11, v12, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1133,13 +1187,12 @@ define half @vreduce_ord_fadd_nxv12f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv12f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: fmv.h.x ft0, zero ; CHECK-NEXT: fneg.h ft0, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v11, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1162,11 +1215,10 @@ ; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %red = call reassoc half @llvm.vector.reduce.fadd.nxv3f16(half %s, %v) ret half %red @@ -1184,11 +1236,11 @@ ; CHECK-NEXT: vfmv.v.f v10, ft0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v9, v10, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v10 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %red = call reassoc half @llvm.vector.reduce.fadd.nxv6f16(half %s, %v) ret half %red @@ -1212,9 +1264,8 @@ ; CHECK-NEXT: vslideup.vi v11, v12, 0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v11, v12, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v12, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1229,11 +1280,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI74_0) ; CHECK-NEXT: flh ft0, %lo(.LCPI74_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v11, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v12, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll @@ -9,11 +9,10 @@ define half @vpreduce_fadd_nxv1f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, %v, %m, i32 %evl) ret half %r @@ -22,11 +21,10 @@ define half @vpreduce_ord_fadd_nxv1f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, %v, %m, i32 %evl) ret half %r @@ -37,11 +35,10 @@ define half @vpreduce_fadd_nxv2f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, %v, %m, i32 %evl) ret half %r @@ -50,11 +47,10 @@ define half @vpreduce_ord_fadd_nxv2f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, %v, %m, i32 %evl) ret half %r @@ -65,11 +61,10 @@ define half @vpreduce_fadd_nxv4f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, %v, %m, i32 %evl) ret half %r @@ -78,11 +73,10 @@ define half @vpreduce_ord_fadd_nxv4f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, %v, %m, i32 %evl) ret half %r @@ -96,25 +90,23 @@ ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 1 ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v24, v0, a2 +; CHECK-NEXT: vslidedown.vx v1, v0, a2 ; CHECK-NEXT: slli a2, a1, 2 ; CHECK-NEXT: sub a1, a0, a2 ; CHECK-NEXT: sltu a3, a0, a1 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a1, a3, a1 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v25, fa0 ; CHECK-NEXT: bltu a0, a2, .LBB6_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: .LBB6_2: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma -; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v8, ft0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v24, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v24, v0.t +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v8, ft0 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfredusum.vs v8, v16, v8, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -128,25 +120,23 @@ ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 1 ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v24, v0, a2 +; CHECK-NEXT: vslidedown.vx v1, v0, a2 ; CHECK-NEXT: slli a2, a1, 2 ; CHECK-NEXT: sub a1, a0, a2 ; CHECK-NEXT: sltu a3, a0, a1 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a1, a3, a1 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v25, fa0 ; CHECK-NEXT: bltu a0, a2, .LBB7_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma -; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v8, ft0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v24, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v24, v0.t +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v8, ft0 +; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfredosum.vs v8, v16, v8, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -159,11 +149,10 @@ define float @vpreduce_fadd_nxv1f32(float %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, %v, %m, i32 %evl) ret float %r @@ -172,11 +161,10 @@ define float @vpreduce_ord_fadd_nxv1f32(float %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, %v, %m, i32 %evl) ret float %r @@ -187,11 +175,10 @@ define float @vpreduce_fadd_nxv2f32(float %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, %v, %m, i32 %evl) ret float %r @@ -200,11 +187,10 @@ define float @vpreduce_ord_fadd_nxv2f32(float %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, %v, %m, i32 %evl) ret float %r @@ -215,11 +201,10 @@ define float @vpreduce_fadd_nxv4f32(float %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; CHECK-NEXT: vfredusum.vs v10, v8, v10, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v10, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, %v, %m, i32 %evl) ret float %r @@ -228,11 +213,10 @@ define float @vpreduce_ord_fadd_nxv4f32(float %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; CHECK-NEXT: vfredosum.vs v10, v8, v10, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v10, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, %v, %m, i32 %evl) ret float %r @@ -243,11 +227,10 @@ define double @vpreduce_fadd_nxv1f64(double %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, %v, %m, i32 %evl) ret double %r @@ -256,11 +239,10 @@ define double @vpreduce_ord_fadd_nxv1f64(double %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, %v, %m, i32 %evl) ret double %r @@ -271,11 +253,10 @@ define double @vpreduce_fadd_nxv2f64(double %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma -; CHECK-NEXT: vfredusum.vs v10, v8, v10, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v10, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, %v, %m, i32 %evl) ret double %r @@ -284,11 +265,10 @@ define double @vpreduce_ord_fadd_nxv2f64(double %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma -; CHECK-NEXT: vfredosum.vs v10, v8, v10, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v10, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, %v, %m, i32 %evl) ret double %r @@ -299,11 +279,10 @@ define double @vpreduce_fadd_nxv3f64(double %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv3f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma -; CHECK-NEXT: vfredusum.vs v12, v8, v12, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v12, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v12, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc double @llvm.vp.reduce.fadd.nxv3f64(double %s, %v, %m, i32 %evl) ret double %r @@ -312,11 +291,10 @@ define double @vpreduce_ord_fadd_nxv3f64(double %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv3f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma -; CHECK-NEXT: vfredosum.vs v12, v8, v12, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v12, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v12, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call double @llvm.vp.reduce.fadd.nxv3f64(double %s, %v, %m, i32 %evl) ret double %r @@ -327,11 +305,10 @@ define double @vpreduce_fadd_nxv4f64(double %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma -; CHECK-NEXT: vfredusum.vs v12, v8, v12, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v12, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v12, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, %v, %m, i32 %evl) ret double %r @@ -340,11 +317,10 @@ define double @vpreduce_ord_fadd_nxv4f64(double %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma -; CHECK-NEXT: vfredosum.vs v12, v8, v12, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v12, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v12, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, %v, %m, i32 %evl) ret double %r diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll @@ -9,11 +9,10 @@ define signext i8 @vpreduce_add_nxv1i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.add.nxv1i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -24,12 +23,10 @@ define signext i8 @vpreduce_umax_nxv1i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -40,11 +37,10 @@ define signext i8 @vpreduce_smax_nxv1i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -55,12 +51,10 @@ define signext i8 @vpreduce_umin_nxv1i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -71,11 +65,10 @@ define signext i8 @vpreduce_smin_nxv1i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -86,11 +79,10 @@ define signext i8 @vpreduce_and_nxv1i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.and.nxv1i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -101,11 +93,10 @@ define signext i8 @vpreduce_or_nxv1i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.or.nxv1i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -116,11 +107,10 @@ define signext i8 @vpreduce_xor_nxv1i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -131,11 +121,10 @@ define signext i8 @vpreduce_add_nxv2i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.add.nxv2i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -146,12 +135,10 @@ define signext i8 @vpreduce_umax_nxv2i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -162,11 +149,10 @@ define signext i8 @vpreduce_smax_nxv2i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -177,12 +163,10 @@ define signext i8 @vpreduce_umin_nxv2i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -193,11 +177,10 @@ define signext i8 @vpreduce_smin_nxv2i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -208,11 +191,10 @@ define signext i8 @vpreduce_and_nxv2i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.and.nxv2i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -223,11 +205,10 @@ define signext i8 @vpreduce_or_nxv2i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.or.nxv2i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -238,11 +219,10 @@ define signext i8 @vpreduce_xor_nxv2i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -253,11 +233,10 @@ define signext i8 @vpreduce_smax_nxv3i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv3i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smax.nxv3i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -268,11 +247,10 @@ define signext i8 @vpreduce_add_nxv4i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.add.nxv4i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -283,12 +261,10 @@ define signext i8 @vpreduce_umax_nxv4i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -299,11 +275,10 @@ define signext i8 @vpreduce_smax_nxv4i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -314,12 +289,10 @@ define signext i8 @vpreduce_umin_nxv4i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -330,11 +303,10 @@ define signext i8 @vpreduce_smin_nxv4i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -345,11 +317,10 @@ define signext i8 @vpreduce_and_nxv4i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.and.nxv4i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -360,11 +331,10 @@ define signext i8 @vpreduce_or_nxv4i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.or.nxv4i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -375,11 +345,10 @@ define signext i8 @vpreduce_xor_nxv4i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -390,11 +359,10 @@ define signext i16 @vpreduce_add_nxv1i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.add.nxv1i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -403,27 +371,13 @@ declare i16 @llvm.vp.reduce.umax.nxv1i16(i16, , , i32) define signext i16 @vpreduce_umax_nxv1i16(i16 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv1i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv1i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 %s, %v, %m, i32 %evl) ret i16 %r } @@ -433,11 +387,10 @@ define signext i16 @vpreduce_smax_nxv1i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -446,27 +399,13 @@ declare i16 @llvm.vp.reduce.umin.nxv1i16(i16, , , i32) define signext i16 @vpreduce_umin_nxv1i16(i16 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_nxv1i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_nxv1i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 %s, %v, %m, i32 %evl) ret i16 %r } @@ -476,11 +415,10 @@ define signext i16 @vpreduce_smin_nxv1i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -491,11 +429,10 @@ define signext i16 @vpreduce_and_nxv1i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.and.nxv1i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -506,11 +443,10 @@ define signext i16 @vpreduce_or_nxv1i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.or.nxv1i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -521,11 +457,10 @@ define signext i16 @vpreduce_xor_nxv1i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -536,11 +471,10 @@ define signext i16 @vpreduce_add_nxv2i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.add.nxv2i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -549,27 +483,13 @@ declare i16 @llvm.vp.reduce.umax.nxv2i16(i16, , , i32) define signext i16 @vpreduce_umax_nxv2i16(i16 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv2i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv2i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 %s, %v, %m, i32 %evl) ret i16 %r } @@ -579,11 +499,10 @@ define signext i16 @vpreduce_smax_nxv2i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -592,27 +511,13 @@ declare i16 @llvm.vp.reduce.umin.nxv2i16(i16, , , i32) define signext i16 @vpreduce_umin_nxv2i16(i16 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_nxv2i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_nxv2i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 %s, %v, %m, i32 %evl) ret i16 %r } @@ -622,11 +527,10 @@ define signext i16 @vpreduce_smin_nxv2i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -637,11 +541,10 @@ define signext i16 @vpreduce_and_nxv2i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.and.nxv2i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -652,11 +555,10 @@ define signext i16 @vpreduce_or_nxv2i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.or.nxv2i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -667,11 +569,10 @@ define signext i16 @vpreduce_xor_nxv2i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -682,11 +583,10 @@ define signext i16 @vpreduce_add_nxv4i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.add.nxv4i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -695,27 +595,13 @@ declare i16 @llvm.vp.reduce.umax.nxv4i16(i16, , , i32) define signext i16 @vpreduce_umax_nxv4i16(i16 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv4i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv4i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 %s, %v, %m, i32 %evl) ret i16 %r } @@ -725,11 +611,10 @@ define signext i16 @vpreduce_smax_nxv4i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -738,27 +623,13 @@ declare i16 @llvm.vp.reduce.umin.nxv4i16(i16, , , i32) define signext i16 @vpreduce_umin_nxv4i16(i16 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_nxv4i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_nxv4i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 %s, %v, %m, i32 %evl) ret i16 %r } @@ -768,11 +639,10 @@ define signext i16 @vpreduce_smin_nxv4i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -783,11 +653,10 @@ define signext i16 @vpreduce_and_nxv4i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.and.nxv4i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -798,11 +667,10 @@ define signext i16 @vpreduce_or_nxv4i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.or.nxv4i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -813,11 +681,10 @@ define signext i16 @vpreduce_xor_nxv4i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -828,11 +695,10 @@ define signext i32 @vpreduce_add_nxv1i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.add.nxv1i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -841,25 +707,13 @@ declare i32 @llvm.vp.reduce.umax.nxv1i32(i32, , , i32) define signext i32 @vpreduce_umax_nxv1i32(i32 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 %s, %v, %m, i32 %evl) ret i32 %r } @@ -869,11 +723,10 @@ define signext i32 @vpreduce_smax_nxv1i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -882,25 +735,13 @@ declare i32 @llvm.vp.reduce.umin.nxv1i32(i32, , , i32) define signext i32 @vpreduce_umin_nxv1i32(i32 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 %s, %v, %m, i32 %evl) ret i32 %r } @@ -910,11 +751,10 @@ define signext i32 @vpreduce_smin_nxv1i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -925,11 +765,10 @@ define signext i32 @vpreduce_and_nxv1i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.and.nxv1i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -940,11 +779,10 @@ define signext i32 @vpreduce_or_nxv1i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.or.nxv1i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -955,11 +793,10 @@ define signext i32 @vpreduce_xor_nxv1i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -970,11 +807,10 @@ define signext i32 @vpreduce_add_nxv2i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.add.nxv2i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -983,25 +819,13 @@ declare i32 @llvm.vp.reduce.umax.nxv2i32(i32, , , i32) define signext i32 @vpreduce_umax_nxv2i32(i32 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 %s, %v, %m, i32 %evl) ret i32 %r } @@ -1011,11 +835,10 @@ define signext i32 @vpreduce_smax_nxv2i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1024,25 +847,13 @@ declare i32 @llvm.vp.reduce.umin.nxv2i32(i32, , , i32) define signext i32 @vpreduce_umin_nxv2i32(i32 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 %s, %v, %m, i32 %evl) ret i32 %r } @@ -1052,11 +863,10 @@ define signext i32 @vpreduce_smin_nxv2i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1067,11 +877,10 @@ define signext i32 @vpreduce_and_nxv2i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.and.nxv2i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1082,11 +891,10 @@ define signext i32 @vpreduce_or_nxv2i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.or.nxv2i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1097,11 +905,10 @@ define signext i32 @vpreduce_xor_nxv2i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1112,11 +919,10 @@ define signext i32 @vpreduce_add_nxv4i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; CHECK-NEXT: vredsum.vs v10, v8, v10, v0.t -; CHECK-NEXT: vmv.x.s a0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v10, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.add.nxv4i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1125,25 +931,13 @@ declare i32 @llvm.vp.reduce.umax.nxv4i32(i32, , , i32) define signext i32 @vpreduce_umax_nxv4i32(i32 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v10, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; RV32-NEXT: vredmaxu.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; RV64-NEXT: vredmaxu.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v10, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 %s, %v, %m, i32 %evl) ret i32 %r } @@ -1151,63 +945,31 @@ declare i32 @llvm.vp.reduce.umax.nxv32i32(i32, , , i32) define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv32i32: -; RV32: # %bb.0: -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: srli a3, a2, 2 -; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vx v24, v0, a3 -; RV32-NEXT: slli a3, a2, 1 -; RV32-NEXT: sub a2, a1, a3 -; RV32-NEXT: sltu a4, a1, a2 -; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: and a2, a4, a2 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v25, a0 -; RV32-NEXT: bltu a1, a3, .LBB67_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a1, a3 -; RV32-NEXT: .LBB67_2: -; RV32-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; RV32-NEXT: vredmaxu.vs v25, v8, v25, v0.t -; RV32-NEXT: vmv.x.s a0, v25 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vsetvli zero, a2, e32, m8, tu, ma -; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: vredmaxu.vs v8, v16, v8, v0.t -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv32i32: -; RV64: # %bb.0: -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: srli a2, a3, 2 -; RV64-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v24, v0, a2 -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a2, a0, 32 -; RV64-NEXT: slli a3, a3, 1 -; RV64-NEXT: sub a0, a1, a3 -; RV64-NEXT: sltu a4, a1, a0 -; RV64-NEXT: addi a4, a4, -1 -; RV64-NEXT: and a0, a4, a0 -; RV64-NEXT: bltu a1, a3, .LBB67_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, a3 -; RV64-NEXT: .LBB67_2: -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v25, a2 -; RV64-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; RV64-NEXT: vredmaxu.vs v25, v8, v25, v0.t -; RV64-NEXT: vmv.x.s a1, v25 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v8, a1 -; RV64-NEXT: vsetvli zero, a0, e32, m8, tu, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vredmaxu.vs v8, v16, v8, v0.t -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_nxv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v1, v0, a3 +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: sub a2, a1, a3 +; CHECK-NEXT: sltu a4, a1, a2 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a2, a4, a2 +; CHECK-NEXT: bltu a1, a3, .LBB67_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a3 +; CHECK-NEXT: .LBB67_2: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.x v24, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v24, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vredmaxu.vs v8, v16, v8, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umax.nxv32i32(i32 %s, %v, %m, i32 %evl) ret i32 %r } @@ -1217,11 +979,10 @@ define signext i32 @vpreduce_smax_nxv4i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; CHECK-NEXT: vredmax.vs v10, v8, v10, v0.t -; CHECK-NEXT: vmv.x.s a0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v10, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1230,25 +991,13 @@ declare i32 @llvm.vp.reduce.umin.nxv4i32(i32, , , i32) define signext i32 @vpreduce_umin_nxv4i32(i32 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v10, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; RV32-NEXT: vredminu.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; RV64-NEXT: vredminu.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v10, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 %s, %v, %m, i32 %evl) ret i32 %r } @@ -1258,11 +1007,10 @@ define signext i32 @vpreduce_smin_nxv4i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; CHECK-NEXT: vredmin.vs v10, v8, v10, v0.t -; CHECK-NEXT: vmv.x.s a0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v10, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1273,11 +1021,10 @@ define signext i32 @vpreduce_and_nxv4i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; CHECK-NEXT: vredand.vs v10, v8, v10, v0.t -; CHECK-NEXT: vmv.x.s a0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vredand.vs v8, v8, v10, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.and.nxv4i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1288,11 +1035,10 @@ define signext i32 @vpreduce_or_nxv4i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; CHECK-NEXT: vredor.vs v10, v8, v10, v0.t -; CHECK-NEXT: vmv.x.s a0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vredor.vs v8, v8, v10, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.or.nxv4i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1303,11 +1049,10 @@ define signext i32 @vpreduce_xor_nxv4i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; CHECK-NEXT: vredxor.vs v10, v8, v10, v0.t -; CHECK-NEXT: vmv.x.s a0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v10, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1323,25 +1068,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredsum.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredsum.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_add_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredsum.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredsum.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.add.nxv1i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1355,27 +1098,26 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e32, mf2, tu, ma -; RV32-NEXT: vwredsum.vs v9, v8, v9, v0.t +; RV32-NEXT: vwredsum.vs v8, v8, v9, v0.t ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpwreduce_add_nxv1i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV64-NEXT: vwredsum.vs v9, v8, v9, v0.t +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vwredsum.vs v8, v8, v9, v0.t ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %e = sext %v to %r = call i64 @llvm.vp.reduce.add.nxv1i64(i64 %s, %e, %m, i32 %evl) @@ -1390,27 +1132,26 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e32, mf2, tu, ma -; RV32-NEXT: vwredsum.vs v9, v8, v9, v0.t +; RV32-NEXT: vwredsum.vs v8, v8, v9, v0.t ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpwreduce_uadd_nxv1i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV64-NEXT: vwredsum.vs v9, v8, v9, v0.t +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vwredsum.vs v8, v8, v9, v0.t ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %e = sext %v to %r = call i64 @llvm.vp.reduce.add.nxv1i64(i64 %s, %e, %m, i32 %evl) @@ -1427,25 +1168,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umax_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1461,25 +1200,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredmax.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredmax.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_smax_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredmax.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredmax.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1495,25 +1232,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredminu.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umin_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredminu.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1529,25 +1264,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredmin.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredmin.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_smin_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredmin.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredmin.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1563,25 +1296,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredand.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredand.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_and_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredand.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredand.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.and.nxv1i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1597,25 +1328,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredor.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredor.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_or_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredor.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredor.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.or.nxv1i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1631,25 +1360,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredxor.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredxor.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_xor_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredxor.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredxor.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1665,25 +1392,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredsum.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vredsum.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_add_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredsum.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vredsum.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.add.nxv2i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1697,27 +1422,26 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e32, m1, tu, ma -; RV32-NEXT: vwredsum.vs v9, v8, v9, v0.t +; RV32-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vwredsum.vs v8, v8, v10, v0.t ; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vwpreduce_add_nxv2i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV64-NEXT: vwredsum.vs v9, v8, v9, v0.t +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64-NEXT: vwredsum.vs v8, v8, v10, v0.t ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %e = sext %v to %r = call i64 @llvm.vp.reduce.add.nxv2i64(i64 %s, %e, %m, i32 %evl) @@ -1732,27 +1456,26 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e32, m1, tu, ma -; RV32-NEXT: vwredsum.vs v9, v8, v9, v0.t +; RV32-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vwredsum.vs v8, v8, v10, v0.t ; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vwpreduce_uadd_nxv2i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV64-NEXT: vwredsum.vs v9, v8, v9, v0.t +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64-NEXT: vwredsum.vs v8, v8, v10, v0.t ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %e = sext %v to %r = call i64 @llvm.vp.reduce.add.nxv2i64(i64 %s, %e, %m, i32 %evl) @@ -1769,25 +1492,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredmaxu.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vredmaxu.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umax_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredmaxu.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vredmaxu.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1803,25 +1524,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredmax.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vredmax.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_smax_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredmax.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vredmax.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1837,25 +1556,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredminu.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vredminu.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umin_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredminu.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vredminu.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1871,25 +1588,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredmin.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vredmin.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_smin_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredmin.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vredmin.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1905,25 +1620,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredand.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vredand.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_and_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredand.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vredand.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.and.nxv2i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1939,25 +1652,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredor.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vredor.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_or_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredor.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vredor.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.or.nxv2i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1973,25 +1684,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredxor.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vredxor.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_xor_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredxor.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vredxor.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -2007,25 +1716,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma -; RV32-NEXT: vredsum.vs v12, v8, v12, v0.t -; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vredsum.vs v8, v8, v12, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v12, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_add_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma -; RV64-NEXT: vredsum.vs v12, v8, v12, v0.t -; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vredsum.vs v8, v8, v12, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.add.nxv4i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -2039,27 +1746,26 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e32, m2, tu, ma -; RV32-NEXT: vwredsum.vs v10, v8, v10, v0.t +; RV32-NEXT: vsetvli zero, a2, e32, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vwredsum.vs v8, v8, v12, v0.t ; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpwreduce_add_nxv4i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; RV64-NEXT: vwredsum.vs v10, v8, v10, v0.t +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vwredsum.vs v8, v8, v12, v0.t ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %e = sext %v to %r = call i64 @llvm.vp.reduce.add.nxv4i64(i64 %s, %e, %m, i32 %evl) @@ -2074,27 +1780,26 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e32, m2, tu, ma -; RV32-NEXT: vwredsumu.vs v10, v8, v10, v0.t +; RV32-NEXT: vsetvli zero, a2, e32, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vwredsumu.vs v8, v8, v12, v0.t ; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpwreduce_uadd_nxv4i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; RV64-NEXT: vwredsumu.vs v10, v8, v10, v0.t +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vwredsumu.vs v8, v8, v12, v0.t ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %e = zext %v to %r = call i64 @llvm.vp.reduce.add.nxv4i64(i64 %s, %e, %m, i32 %evl) @@ -2111,25 +1816,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma -; RV32-NEXT: vredmaxu.vs v12, v8, v12, v0.t -; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vredmaxu.vs v8, v8, v12, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v12, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umax_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma -; RV64-NEXT: vredmaxu.vs v12, v8, v12, v0.t -; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vredmaxu.vs v8, v8, v12, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -2145,25 +1848,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma -; RV32-NEXT: vredmax.vs v12, v8, v12, v0.t -; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vredmax.vs v8, v8, v12, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v12, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_smax_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma -; RV64-NEXT: vredmax.vs v12, v8, v12, v0.t -; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vredmax.vs v8, v8, v12, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -2179,25 +1880,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma -; RV32-NEXT: vredminu.vs v12, v8, v12, v0.t -; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vredminu.vs v8, v8, v12, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v12, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umin_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma -; RV64-NEXT: vredminu.vs v12, v8, v12, v0.t -; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vredminu.vs v8, v8, v12, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -2213,25 +1912,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma -; RV32-NEXT: vredmin.vs v12, v8, v12, v0.t -; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vredmin.vs v8, v8, v12, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v12, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_smin_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma -; RV64-NEXT: vredmin.vs v12, v8, v12, v0.t -; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vredmin.vs v8, v8, v12, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -2247,25 +1944,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma -; RV32-NEXT: vredand.vs v12, v8, v12, v0.t -; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vredand.vs v8, v8, v12, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v12, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_and_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma -; RV64-NEXT: vredand.vs v12, v8, v12, v0.t -; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vredand.vs v8, v8, v12, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.and.nxv4i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -2281,25 +1976,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma -; RV32-NEXT: vredor.vs v12, v8, v12, v0.t -; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vredor.vs v8, v8, v12, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v12, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_or_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma -; RV64-NEXT: vredor.vs v12, v8, v12, v0.t -; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vredor.vs v8, v8, v12, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.or.nxv4i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -2315,25 +2008,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma -; RV32-NEXT: vredxor.vs v12, v8, v12, v0.t -; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vredxor.vs v8, v8, v12, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v12, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_xor_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma -; RV64-NEXT: vredxor.vs v12, v8, v12, v0.t -; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vredxor.vs v8, v8, v12, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 %s, %v, %m, i32 %evl) ret i64 %r diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll @@ -9,9 +9,8 @@ define signext i8 @vreduce_add_nxv1i8( %v) { ; CHECK-LABEL: vreduce_add_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -24,9 +23,8 @@ define signext i8 @vreduce_umax_nxv1i8( %v) { ; CHECK-LABEL: vreduce_umax_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -40,9 +38,8 @@ ; CHECK-LABEL: vreduce_smax_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -55,9 +52,8 @@ define signext i8 @vreduce_umin_nxv1i8( %v) { ; CHECK-LABEL: vreduce_umin_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -71,9 +67,8 @@ ; CHECK-LABEL: vreduce_smin_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -86,9 +81,8 @@ define signext i8 @vreduce_and_nxv1i8( %v) { ; CHECK-LABEL: vreduce_and_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -101,9 +95,8 @@ define signext i8 @vreduce_or_nxv1i8( %v) { ; CHECK-LABEL: vreduce_or_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -116,9 +109,8 @@ define signext i8 @vreduce_xor_nxv1i8( %v) { ; CHECK-LABEL: vreduce_xor_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -131,9 +123,8 @@ define signext i8 @vreduce_add_nxv2i8( %v) { ; CHECK-LABEL: vreduce_add_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -146,9 +137,8 @@ define signext i8 @vreduce_umax_nxv2i8( %v) { ; CHECK-LABEL: vreduce_umax_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -162,9 +152,8 @@ ; CHECK-LABEL: vreduce_smax_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -177,9 +166,8 @@ define signext i8 @vreduce_umin_nxv2i8( %v) { ; CHECK-LABEL: vreduce_umin_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -193,9 +181,8 @@ ; CHECK-LABEL: vreduce_smin_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -208,9 +195,8 @@ define signext i8 @vreduce_and_nxv2i8( %v) { ; CHECK-LABEL: vreduce_and_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -223,9 +209,8 @@ define signext i8 @vreduce_or_nxv2i8( %v) { ; CHECK-LABEL: vreduce_or_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -238,9 +223,8 @@ define signext i8 @vreduce_xor_nxv2i8( %v) { ; CHECK-LABEL: vreduce_xor_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -253,9 +237,8 @@ define signext i8 @vreduce_add_nxv4i8( %v) { ; CHECK-LABEL: vreduce_add_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -268,9 +251,8 @@ define signext i8 @vreduce_umax_nxv4i8( %v) { ; CHECK-LABEL: vreduce_umax_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -284,9 +266,8 @@ ; CHECK-LABEL: vreduce_smax_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -299,9 +280,8 @@ define signext i8 @vreduce_umin_nxv4i8( %v) { ; CHECK-LABEL: vreduce_umin_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -315,9 +295,8 @@ ; CHECK-LABEL: vreduce_smin_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -330,9 +309,8 @@ define signext i8 @vreduce_and_nxv4i8( %v) { ; CHECK-LABEL: vreduce_and_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -345,9 +323,8 @@ define signext i8 @vreduce_or_nxv4i8( %v) { ; CHECK-LABEL: vreduce_or_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -360,9 +337,8 @@ define signext i8 @vreduce_xor_nxv4i8( %v) { ; CHECK-LABEL: vreduce_xor_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -375,9 +351,8 @@ define signext i16 @vreduce_add_nxv1i16( %v) { ; CHECK-LABEL: vreduce_add_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -388,9 +363,9 @@ define signext i16 @vwreduce_add_nxv1i8( %v) { ; CHECK-LABEL: vwreduce_add_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -403,9 +378,9 @@ define signext i16 @vwreduce_uadd_nxv1i8( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -420,9 +395,8 @@ define signext i16 @vreduce_umax_nxv1i16( %v) { ; CHECK-LABEL: vreduce_umax_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -436,9 +410,8 @@ ; CHECK-LABEL: vreduce_smax_nxv1i16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -451,9 +424,8 @@ define signext i16 @vreduce_umin_nxv1i16( %v) { ; CHECK-LABEL: vreduce_umin_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -468,9 +440,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -479,9 +450,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -494,9 +464,8 @@ define signext i16 @vreduce_and_nxv1i16( %v) { ; CHECK-LABEL: vreduce_and_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -509,9 +478,8 @@ define signext i16 @vreduce_or_nxv1i16( %v) { ; CHECK-LABEL: vreduce_or_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -524,9 +492,8 @@ define signext i16 @vreduce_xor_nxv1i16( %v) { ; CHECK-LABEL: vreduce_xor_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -539,9 +506,8 @@ define signext i16 @vreduce_add_nxv2i16( %v) { ; CHECK-LABEL: vreduce_add_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -552,9 +518,9 @@ define signext i16 @vwreduce_add_nxv2i8( %v) { ; CHECK-LABEL: vwreduce_add_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -567,9 +533,9 @@ define signext i16 @vwreduce_uadd_nxv2i8( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -584,9 +550,8 @@ define signext i16 @vreduce_umax_nxv2i16( %v) { ; CHECK-LABEL: vreduce_umax_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -600,9 +565,8 @@ ; CHECK-LABEL: vreduce_smax_nxv2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -615,9 +579,8 @@ define signext i16 @vreduce_umin_nxv2i16( %v) { ; CHECK-LABEL: vreduce_umin_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -632,9 +595,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -643,9 +605,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -658,9 +619,8 @@ define signext i16 @vreduce_and_nxv2i16( %v) { ; CHECK-LABEL: vreduce_and_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -673,9 +633,8 @@ define signext i16 @vreduce_or_nxv2i16( %v) { ; CHECK-LABEL: vreduce_or_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -688,9 +647,8 @@ define signext i16 @vreduce_xor_nxv2i16( %v) { ; CHECK-LABEL: vreduce_xor_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -703,9 +661,8 @@ define signext i16 @vreduce_add_nxv4i16( %v) { ; CHECK-LABEL: vreduce_add_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -716,9 +673,9 @@ define signext i16 @vwreduce_add_nxv4i8( %v) { ; CHECK-LABEL: vwreduce_add_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -731,9 +688,9 @@ define signext i16 @vwreduce_uadd_nxv4i8( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -748,9 +705,8 @@ define signext i16 @vreduce_umax_nxv4i16( %v) { ; CHECK-LABEL: vreduce_umax_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -764,9 +720,8 @@ ; CHECK-LABEL: vreduce_smax_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -779,9 +734,8 @@ define signext i16 @vreduce_umin_nxv4i16( %v) { ; CHECK-LABEL: vreduce_umin_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -796,9 +750,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -807,9 +760,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -822,9 +774,8 @@ define signext i16 @vreduce_and_nxv4i16( %v) { ; CHECK-LABEL: vreduce_and_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -837,9 +788,8 @@ define signext i16 @vreduce_or_nxv4i16( %v) { ; CHECK-LABEL: vreduce_or_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -852,9 +802,8 @@ define signext i16 @vreduce_xor_nxv4i16( %v) { ; CHECK-LABEL: vreduce_xor_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -867,9 +816,8 @@ define signext i32 @vreduce_add_nxv1i32( %v) { ; CHECK-LABEL: vreduce_add_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -880,9 +828,9 @@ define signext i32 @vwreduce_add_nxv1i16( %v) { ; CHECK-LABEL: vwreduce_add_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -895,9 +843,9 @@ define signext i32 @vwreduce_uadd_nxv1i16( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -912,9 +860,8 @@ define signext i32 @vreduce_umax_nxv1i32( %v) { ; CHECK-LABEL: vreduce_umax_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -928,9 +875,8 @@ ; CHECK-LABEL: vreduce_smax_nxv1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -943,9 +889,8 @@ define signext i32 @vreduce_umin_nxv1i32( %v) { ; CHECK-LABEL: vreduce_umin_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -960,9 +905,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -971,9 +915,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -986,9 +929,8 @@ define signext i32 @vreduce_and_nxv1i32( %v) { ; CHECK-LABEL: vreduce_and_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1001,9 +943,8 @@ define signext i32 @vreduce_or_nxv1i32( %v) { ; CHECK-LABEL: vreduce_or_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1016,9 +957,8 @@ define signext i32 @vreduce_xor_nxv1i32( %v) { ; CHECK-LABEL: vreduce_xor_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1031,9 +971,8 @@ define signext i32 @vreduce_add_nxv2i32( %v) { ; CHECK-LABEL: vreduce_add_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1044,9 +983,9 @@ define signext i32 @vwreduce_add_nxv2i16( %v) { ; CHECK-LABEL: vwreduce_add_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -1059,9 +998,9 @@ define signext i32 @vwreduce_uadd_nxv2i16( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -1076,9 +1015,8 @@ define signext i32 @vreduce_umax_nxv2i32( %v) { ; CHECK-LABEL: vreduce_umax_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1092,9 +1030,8 @@ ; CHECK-LABEL: vreduce_smax_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1107,9 +1044,8 @@ define signext i32 @vreduce_umin_nxv2i32( %v) { ; CHECK-LABEL: vreduce_umin_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1124,9 +1060,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -1135,9 +1070,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1150,9 +1084,8 @@ define signext i32 @vreduce_and_nxv2i32( %v) { ; CHECK-LABEL: vreduce_and_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1165,9 +1098,8 @@ define signext i32 @vreduce_or_nxv2i32( %v) { ; CHECK-LABEL: vreduce_or_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1180,9 +1112,8 @@ define signext i32 @vreduce_xor_nxv2i32( %v) { ; CHECK-LABEL: vreduce_xor_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1195,9 +1126,8 @@ define signext i32 @vreduce_add_nxv4i32( %v) { ; CHECK-LABEL: vreduce_add_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1208,10 +1138,10 @@ define signext i32 @vwreduce_add_nxv4i16( %v) { ; CHECK-LABEL: vwreduce_add_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vwredsum.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vwredsum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1223,10 +1153,10 @@ define signext i32 @vwreduce_uadd_nxv4i16( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vwredsumu.vs v8, v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vwredsumu.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1240,9 +1170,8 @@ define signext i32 @vreduce_umax_nxv4i32( %v) { ; CHECK-LABEL: vreduce_umax_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1256,9 +1185,8 @@ ; CHECK-LABEL: vreduce_smax_nxv4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1271,9 +1199,8 @@ define signext i32 @vreduce_umin_nxv4i32( %v) { ; CHECK-LABEL: vreduce_umin_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1288,9 +1215,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v10, a0 -; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a0 ; RV32-NEXT: vredmin.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -1299,9 +1225,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 ; RV64-NEXT: vredmin.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1314,9 +1239,8 @@ define signext i32 @vreduce_and_nxv4i32( %v) { ; CHECK-LABEL: vreduce_and_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vredand.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1329,9 +1253,8 @@ define signext i32 @vreduce_or_nxv4i32( %v) { ; CHECK-LABEL: vreduce_or_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1344,9 +1267,8 @@ define signext i32 @vreduce_xor_nxv4i32( %v) { ; CHECK-LABEL: vreduce_xor_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1359,9 +1281,8 @@ define i64 @vreduce_add_nxv1i64( %v) { ; RV32-LABEL: vreduce_add_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, zero ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vredsum.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1372,9 +1293,8 @@ ; ; RV64-LABEL: vreduce_add_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vredsum.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1385,9 +1305,9 @@ define i64 @vwreduce_add_nxv1i32( %v) { ; RV32-LABEL: vwreduce_add_nxv1i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV32-NEXT: vwredsum.vs v8, v8, v9 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 @@ -1399,9 +1319,9 @@ ; ; RV64-LABEL: vwreduce_add_nxv1i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, 0 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1414,9 +1334,9 @@ define i64 @vwreduce_uadd_nxv1i32( %v) { ; RV32-LABEL: vwreduce_uadd_nxv1i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV32-NEXT: vwredsumu.vs v8, v8, v9 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 @@ -1428,9 +1348,9 @@ ; ; RV64-LABEL: vwreduce_uadd_nxv1i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, 0 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1445,9 +1365,8 @@ define i64 @vreduce_umax_nxv1i64( %v) { ; RV32-LABEL: vreduce_umax_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, zero ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vredmaxu.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1458,9 +1377,8 @@ ; ; RV64-LABEL: vreduce_umax_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vredmaxu.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1479,9 +1397,8 @@ ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: sw zero, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vredmax.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1495,9 +1412,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmax.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1510,9 +1426,8 @@ define i64 @vreduce_umin_nxv1i64( %v) { ; RV32-LABEL: vreduce_umin_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, -1 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, -1 ; RV32-NEXT: vredminu.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1523,12 +1438,11 @@ ; ; RV64-LABEL: vreduce_umin_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, -1 ; RV64-NEXT: vredminu.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; RV64-NEXT: ret %red = call i64 @llvm.vector.reduce.umin.nxv1i64( %v) ret i64 %red } @@ -1546,9 +1460,8 @@ ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1562,9 +1475,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1577,9 +1489,8 @@ define i64 @vreduce_and_nxv1i64( %v) { ; RV32-LABEL: vreduce_and_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, -1 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, -1 ; RV32-NEXT: vredand.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1590,9 +1501,8 @@ ; ; RV64-LABEL: vreduce_and_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, -1 ; RV64-NEXT: vredand.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1605,9 +1515,8 @@ define i64 @vreduce_or_nxv1i64( %v) { ; RV32-LABEL: vreduce_or_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, zero ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vredor.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1618,9 +1527,8 @@ ; ; RV64-LABEL: vreduce_or_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vredor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1633,9 +1541,8 @@ define i64 @vreduce_xor_nxv1i64( %v) { ; RV32-LABEL: vreduce_xor_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, zero ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vredxor.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1646,9 +1553,8 @@ ; ; RV64-LABEL: vreduce_xor_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vredxor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1661,9 +1567,8 @@ define i64 @vreduce_add_nxv2i64( %v) { ; RV32-LABEL: vreduce_add_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v10, zero ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: vredsum.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1674,9 +1579,8 @@ ; ; RV64-LABEL: vreduce_add_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: vredsum.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1687,10 +1591,10 @@ define i64 @vwreduce_add_nxv2i32( %v) { ; RV32-LABEL: vwreduce_add_nxv2i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; RV32-NEXT: vwredsum.vs v8, v8, v9 +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32-NEXT: vwredsum.vs v8, v8, v10 ; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1701,10 +1605,10 @@ ; ; RV64-LABEL: vwreduce_add_nxv2i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; RV64-NEXT: vwredsum.vs v8, v8, v9 +; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vmv.v.i v10, 0 +; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64-NEXT: vwredsum.vs v8, v8, v10 ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1716,10 +1620,10 @@ define i64 @vwreduce_uadd_nxv2i32( %v) { ; RV32-LABEL: vwreduce_uadd_nxv2i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; RV32-NEXT: vwredsumu.vs v8, v8, v9 +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32-NEXT: vwredsumu.vs v8, v8, v10 ; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1730,10 +1634,10 @@ ; ; RV64-LABEL: vwreduce_uadd_nxv2i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; RV64-NEXT: vwredsumu.vs v8, v8, v9 +; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vmv.v.i v10, 0 +; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64-NEXT: vwredsumu.vs v8, v8, v10 ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1747,9 +1651,8 @@ define i64 @vreduce_umax_nxv2i64( %v) { ; RV32-LABEL: vreduce_umax_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v10, zero ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: vredmaxu.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1760,9 +1663,8 @@ ; ; RV64-LABEL: vreduce_umax_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: vredmaxu.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1781,9 +1683,8 @@ ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: sw zero, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV32-NEXT: vredmax.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1797,9 +1698,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 ; RV64-NEXT: vredmax.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1812,9 +1712,8 @@ define i64 @vreduce_umin_nxv2i64( %v) { ; RV32-LABEL: vreduce_umin_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v10, -1 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32-NEXT: vmv.v.i v10, -1 ; RV32-NEXT: vredminu.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1825,9 +1724,8 @@ ; ; RV64-LABEL: vreduce_umin_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v10, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vmv.v.i v10, -1 ; RV64-NEXT: vredminu.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1848,9 +1746,8 @@ ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1864,9 +1761,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 ; RV64-NEXT: vredmin.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1879,9 +1775,8 @@ define i64 @vreduce_and_nxv2i64( %v) { ; RV32-LABEL: vreduce_and_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v10, -1 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32-NEXT: vmv.v.i v10, -1 ; RV32-NEXT: vredand.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1892,9 +1787,8 @@ ; ; RV64-LABEL: vreduce_and_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v10, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vmv.v.i v10, -1 ; RV64-NEXT: vredand.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1907,9 +1801,8 @@ define i64 @vreduce_or_nxv2i64( %v) { ; RV32-LABEL: vreduce_or_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v10, zero ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: vredor.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1920,9 +1813,8 @@ ; ; RV64-LABEL: vreduce_or_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: vredor.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1935,9 +1827,8 @@ define i64 @vreduce_xor_nxv2i64( %v) { ; RV32-LABEL: vreduce_xor_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v10, zero ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: vredxor.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1948,9 +1839,8 @@ ; ; RV64-LABEL: vreduce_xor_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: vredxor.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1963,9 +1853,8 @@ define i64 @vreduce_add_nxv4i64( %v) { ; RV32-LABEL: vreduce_add_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v12, zero ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vmv.v.i v12, 0 ; RV32-NEXT: vredsum.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1976,9 +1865,8 @@ ; ; RV64-LABEL: vreduce_add_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vmv.v.i v12, 0 ; RV64-NEXT: vredsum.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1989,10 +1877,10 @@ define i64 @vwreduce_add_nxv4i32( %v) { ; RV32-LABEL: vwreduce_add_nxv4i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v10, zero -; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vwredsum.vs v8, v8, v10 +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vwredsum.vs v8, v8, v12 ; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -2003,10 +1891,10 @@ ; ; RV64-LABEL: vwreduce_add_nxv4i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, zero -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vwredsum.vs v8, v8, v10 +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vwredsum.vs v8, v8, v12 ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2018,10 +1906,10 @@ define i64 @vwreduce_uadd_nxv4i32( %v) { ; RV32-LABEL: vwreduce_uadd_nxv4i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v10, zero -; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vwredsumu.vs v8, v8, v10 +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vwredsumu.vs v8, v8, v12 ; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -2032,10 +1920,10 @@ ; ; RV64-LABEL: vwreduce_uadd_nxv4i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, zero -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vwredsumu.vs v8, v8, v10 +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vmv.v.i v12, 0 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vwredsumu.vs v8, v8, v12 ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2049,9 +1937,8 @@ define i64 @vreduce_umax_nxv4i64( %v) { ; RV32-LABEL: vreduce_umax_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v12, zero ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vmv.v.i v12, 0 ; RV32-NEXT: vredmaxu.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -2062,9 +1949,8 @@ ; ; RV64-LABEL: vreduce_umax_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vmv.v.i v12, 0 ; RV64-NEXT: vredmaxu.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2083,9 +1969,8 @@ ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: sw zero, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vredmax.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -2099,9 +1984,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vredmax.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2114,9 +1998,8 @@ define i64 @vreduce_umin_nxv4i64( %v) { ; RV32-LABEL: vreduce_umin_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v12, -1 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vmv.v.i v12, -1 ; RV32-NEXT: vredminu.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -2127,9 +2010,8 @@ ; ; RV64-LABEL: vreduce_umin_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vredminu.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2150,9 +2032,8 @@ ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -2166,9 +2047,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vredmin.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2181,9 +2061,8 @@ define i64 @vreduce_and_nxv4i64( %v) { ; RV32-LABEL: vreduce_and_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v12, -1 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vmv.v.i v12, -1 ; RV32-NEXT: vredand.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -2194,9 +2073,8 @@ ; ; RV64-LABEL: vreduce_and_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vredand.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2209,9 +2087,8 @@ define i64 @vreduce_or_nxv4i64( %v) { ; RV32-LABEL: vreduce_or_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v12, zero ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vmv.v.i v12, 0 ; RV32-NEXT: vredor.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -2222,9 +2099,8 @@ ; ; RV64-LABEL: vreduce_or_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vmv.v.i v12, 0 ; RV64-NEXT: vredor.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2237,9 +2113,8 @@ define i64 @vreduce_xor_nxv4i64( %v) { ; RV32-LABEL: vreduce_xor_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v12, zero ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vmv.v.i v12, 0 ; RV32-NEXT: vredxor.vs v8, v8, v12 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -2250,9 +2125,8 @@ ; ; RV64-LABEL: vreduce_xor_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vmv.v.i v12, 0 ; RV64-NEXT: vredxor.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret