diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5721,8 +5721,13 @@ SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); SDValue IdentitySplat = - lowerScalarSplat(SDValue(), NeutralElem, DAG.getConstant(1, DL, XLenVT), - M1VT, DL, DAG, Subtarget); + ElementCount::isKnownLE(ContainerVT.getVectorElementCount(), + M1VT.getVectorElementCount()) + ? lowerScalarSplat(SDValue(), NeutralElem, VL, ContainerVT, DL, DAG, + Subtarget) + : lowerScalarSplat(SDValue(), NeutralElem, + DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, + Subtarget); SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec, IdentitySplat, Mask, VL); SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, @@ -5777,14 +5782,19 @@ VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget); } - MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType()); + MVT M1VT = getLMUL1VT(ContainerVT); MVT XLenVT = Subtarget.getXLenVT(); auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); SDValue ScalarSplat = - lowerScalarSplat(SDValue(), ScalarVal, DAG.getConstant(1, DL, XLenVT), - M1VT, DL, DAG, Subtarget); + ElementCount::isKnownLE(ContainerVT.getVectorElementCount(), + M1VT.getVectorElementCount()) + ? lowerScalarSplat(SDValue(), ScalarVal, VL, ContainerVT, DL, DAG, + Subtarget) + : lowerScalarSplat(SDValue(), ScalarVal, + DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, + Subtarget); SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), VectorVal, ScalarSplat, Mask, VL); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, @@ -5850,11 +5860,16 @@ MVT XLenVT = Subtarget.getXLenVT(); MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT; - SDValue StartSplat = lowerScalarSplat(SDValue(), Op.getOperand(0), - DAG.getConstant(1, DL, XLenVT), M1VT, - DL, DAG, Subtarget); - SDValue Reduction = - DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL); + SDValue StartSplat = + ElementCount::isKnownLE(ContainerVT.getVectorElementCount(), + M1VT.getVectorElementCount()) + ? lowerScalarSplat(SDValue(), Op.getOperand(0), VL, ContainerVT, DL, + DAG, Subtarget) + : lowerScalarSplat(SDValue(), Op.getOperand(0), + DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, + Subtarget); + SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec, + StartSplat, Mask, VL); SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction, DAG.getConstant(0, DL, XLenVT)); if (!VecVT.isInteger()) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll @@ -9,11 +9,10 @@ define half @vpreduce_fadd_v2f16(half %s, <2 x half> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.v2f16(half %s, <2 x half> %v, <2 x i1> %m, i32 %evl) ret half %r @@ -22,11 +21,10 @@ define half @vpreduce_ord_fadd_v2f16(half %s, <2 x half> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call half @llvm.vp.reduce.fadd.v2f16(half %s, <2 x half> %v, <2 x i1> %m, i32 %evl) ret half %r @@ -37,11 +35,10 @@ define half @vpreduce_fadd_v4f16(half %s, <4 x half> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.v4f16(half %s, <4 x half> %v, <4 x i1> %m, i32 %evl) ret half %r @@ -50,11 +47,10 @@ define half @vpreduce_ord_fadd_v4f16(half %s, <4 x half> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call half @llvm.vp.reduce.fadd.v4f16(half %s, <4 x half> %v, <4 x i1> %m, i32 %evl) ret half %r @@ -65,11 +61,10 @@ define float @vpreduce_fadd_v2f32(float %s, <2 x float> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc float @llvm.vp.reduce.fadd.v2f32(float %s, <2 x float> %v, <2 x i1> %m, i32 %evl) ret float %r @@ -78,11 +73,10 @@ define float @vpreduce_ord_fadd_v2f32(float %s, <2 x float> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call float @llvm.vp.reduce.fadd.v2f32(float %s, <2 x float> %v, <2 x i1> %m, i32 %evl) ret float %r @@ -93,11 +87,10 @@ define float @vpreduce_fadd_v4f32(float %s, <4 x float> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %s, <4 x float> %v, <4 x i1> %m, i32 %evl) ret float %r @@ -106,11 +99,10 @@ define float @vpreduce_ord_fadd_v4f32(float %s, <4 x float> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call float @llvm.vp.reduce.fadd.v4f32(float %s, <4 x float> %v, <4 x i1> %m, i32 %evl) ret float %r @@ -131,16 +123,16 @@ ; CHECK-NEXT: .LBB8_2: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vfredusum.vs v8, v8, v25, v0.t +; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v8, ft0 ; CHECK-NEXT: addi a1, a0, -32 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfredusum.vs v8, v16, v8, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -162,16 +154,16 @@ ; CHECK-NEXT: .LBB9_2: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vfredosum.vs v8, v8, v25, v0.t +; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v8, ft0 ; CHECK-NEXT: addi a1, a0, -32 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfredosum.vs v8, v16, v8, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -185,11 +177,10 @@ define double @vpreduce_fadd_v2f64(double %s, <2 x double> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc double @llvm.vp.reduce.fadd.v2f64(double %s, <2 x double> %v, <2 x i1> %m, i32 %evl) ret double %r @@ -198,11 +189,10 @@ define double @vpreduce_ord_fadd_v2f64(double %s, <2 x double> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call double @llvm.vp.reduce.fadd.v2f64(double %s, <2 x double> %v, <2 x i1> %m, i32 %evl) ret double %r @@ -215,9 +205,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma -; CHECK-NEXT: vfredusum.vs v10, v8, v10, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfredusum.vs v8, v8, v10, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc double @llvm.vp.reduce.fadd.v3f64(double %s, <3 x double> %v, <3 x i1> %m, i32 %evl) ret double %r @@ -228,9 +218,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma -; CHECK-NEXT: vfredosum.vs v10, v8, v10, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfredosum.vs v8, v8, v10, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call double @llvm.vp.reduce.fadd.v3f64(double %s, <3 x double> %v, <3 x i1> %m, i32 %evl) ret double %r @@ -243,9 +233,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma -; CHECK-NEXT: vfredusum.vs v10, v8, v10, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfredusum.vs v8, v8, v10, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc double @llvm.vp.reduce.fadd.v4f64(double %s, <4 x double> %v, <4 x i1> %m, i32 %evl) ret double %r @@ -256,9 +246,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma -; CHECK-NEXT: vfredosum.vs v10, v8, v10, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfredosum.vs v8, v8, v10, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call double @llvm.vp.reduce.fadd.v4f64(double %s, <4 x double> %v, <4 x i1> %m, i32 %evl) ret double %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>) @@ -38,9 +38,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: fneg.h ft0, ft0 +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <2 x half>, <2 x half>* %x %red = call reassoc half @llvm.vector.reduce.fadd.v2f16(half %s, <2 x half> %v) @@ -52,7 +55,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -68,9 +71,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: fneg.h ft0, ft0 +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <4 x half>, <4 x half>* %x %red = call reassoc half @llvm.vector.reduce.fadd.v4f16(half %s, <4 x half> %v) @@ -82,7 +88,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -98,9 +104,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: fneg.h ft0, ft0 +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <8 x half>, <8 x half>* %x %red = call reassoc half @llvm.vector.reduce.fadd.v8f16(half %s, <8 x half> %v) @@ -112,7 +121,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -317,11 +326,10 @@ define float @vreduce_ord_fwadd_v1f32(<1 x half>* %x, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -339,9 +347,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <2 x float>, <2 x float>* %x %red = call reassoc float @llvm.vector.reduce.fadd.v2f32(float %s, <2 x float> %v) @@ -353,7 +364,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -365,14 +376,16 @@ define float @vreduce_fwadd_v2f32(<2 x half>* %x, float %s) { ; CHECK-LABEL: vreduce_fwadd_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <2 x half>, <2 x half>* %x %e = fpext <2 x half> %v to <2 x float> @@ -383,11 +396,10 @@ define float @vreduce_ord_fwadd_v2f32(<2 x half>* %x, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -405,9 +417,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <4 x float>, <4 x float>* %x %red = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %s, <4 x float> %v) @@ -419,7 +434,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -433,11 +448,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <4 x half>, <4 x half>* %x %e = fpext <4 x half> %v to <4 x float> @@ -450,7 +468,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -830,14 +848,29 @@ declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) define double @vreduce_fadd_v2f64(<2 x double>* %x, double %s) { -; CHECK-LABEL: vreduce_fadd_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fadd_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vfmv.v.f v9, ft0 +; RV32-NEXT: vfredusum.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fadd_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vfmv.v.f v9, ft0 +; RV64-NEXT: vfredusum.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %v = load <2 x double>, <2 x double>* %x %red = call reassoc double @llvm.vector.reduce.fadd.v2f64(double %s, <2 x double> %v) ret double %red @@ -848,7 +881,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -858,16 +891,33 @@ } define double @vreduce_fwadd_v2f64(<2 x float>* %x, double %s) { -; CHECK-LABEL: vreduce_fwadd_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfwredusum.vs v8, v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fwadd_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vfmv.v.f v9, ft0 +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfwredusum.vs v8, v8, v9 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fwadd_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vfmv.v.f v9, ft0 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfwredusum.vs v8, v8, v9 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %v = load <2 x float>, <2 x float>* %x %e = fpext <2 x float> %v to <2 x double> %red = call reassoc double @llvm.vector.reduce.fadd.v2f64(double %s, <2 x double> %e) @@ -879,7 +929,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -1128,43 +1178,6 @@ } define double @vreduce_fwadd_v32f64(<32 x float>* %x, double %s) { -; RV32-LABEL: vreduce_fwadd_v32f64: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu -; RV32-NEXT: vslidedown.vi v16, v8, 16 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vfwadd.vv v24, v8, v16 -; RV32-NEXT: fcvt.d.w ft0, zero -; RV32-NEXT: fneg.d ft0, ft0 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV32-NEXT: vfmv.s.f v8, ft0 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV32-NEXT: vfredusum.vs v8, v24, v8 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: fadd.d fa0, fa0, ft0 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fwadd_v32f64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu -; RV64-NEXT: vslidedown.vi v16, v8, 16 -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV64-NEXT: vfwadd.vv v24, v8, v16 -; RV64-NEXT: fmv.d.x ft0, zero -; RV64-NEXT: fneg.d ft0, ft0 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV64-NEXT: vfmv.s.f v8, ft0 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vfredusum.vs v8, v24, v8 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: fadd.d fa0, fa0, ft0 -; RV64-NEXT: ret ; CHECK-LABEL: vreduce_fwadd_v32f64: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 @@ -1222,9 +1235,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI68_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI68_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1242,9 +1253,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI69_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI69_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1260,9 +1269,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI70_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI70_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1278,9 +1285,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI71_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI71_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1322,9 +1327,7 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI73_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI73_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1342,9 +1345,7 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI74_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI74_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1360,9 +1361,7 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI75_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI75_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1378,9 +1377,7 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI76_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI76_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1428,9 +1425,7 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI78_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI78_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1527,9 +1522,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI83_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI83_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1547,9 +1540,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI84_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI84_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1565,9 +1556,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI85_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI85_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1583,9 +1572,7 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI86_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI86_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1627,9 +1614,7 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI88_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI88_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1647,9 +1632,7 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI89_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI89_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1665,9 +1648,7 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI90_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI90_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1683,9 +1664,7 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI91_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI91_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1733,9 +1712,7 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI93_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI93_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1828,9 +1805,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %v = load <4 x float>, <4 x float>* %x %red = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float %s, <4 x float> %v) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -9,11 +9,10 @@ define signext i8 @vpreduce_add_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.add.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) ret i8 %r @@ -24,12 +23,10 @@ define signext i8 @vpreduce_umax_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umax.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) ret i8 %r @@ -40,11 +37,10 @@ define signext i8 @vpreduce_smax_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smax.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) ret i8 %r @@ -55,12 +51,10 @@ define signext i8 @vpreduce_umin_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umin.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) ret i8 %r @@ -71,11 +65,10 @@ define signext i8 @vpreduce_smin_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smin.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) ret i8 %r @@ -86,11 +79,10 @@ define signext i8 @vpreduce_and_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.and.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) ret i8 %r @@ -101,11 +93,10 @@ define signext i8 @vpreduce_or_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.or.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) ret i8 %r @@ -116,11 +107,10 @@ define signext i8 @vpreduce_xor_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.xor.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) ret i8 %r @@ -131,12 +121,10 @@ define signext i8 @vpreduce_umin_v3i8(i8 signext %s, <3 x i8> %v, <3 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_v3i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umin.v3i8(i8 %s, <3 x i8> %v, <3 x i1> %m, i32 %evl) ret i8 %r @@ -147,11 +135,10 @@ define signext i8 @vpreduce_add_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.add.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) ret i8 %r @@ -162,12 +149,10 @@ define signext i8 @vpreduce_umax_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umax.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) ret i8 %r @@ -178,11 +163,10 @@ define signext i8 @vpreduce_smax_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smax.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) ret i8 %r @@ -193,12 +177,10 @@ define signext i8 @vpreduce_umin_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umin.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) ret i8 %r @@ -209,11 +191,10 @@ define signext i8 @vpreduce_smin_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smin.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) ret i8 %r @@ -224,11 +205,10 @@ define signext i8 @vpreduce_and_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.and.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) ret i8 %r @@ -239,11 +219,10 @@ define signext i8 @vpreduce_or_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.or.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) ret i8 %r @@ -254,11 +233,10 @@ define signext i8 @vpreduce_xor_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.xor.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) ret i8 %r @@ -269,11 +247,10 @@ define signext i16 @vpreduce_add_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.add.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) ret i16 %r @@ -282,27 +259,13 @@ declare i16 @llvm.vp.reduce.umax.v2i16(i16, <2 x i16>, <2 x i1>, i32) define signext i16 @vpreduce_umax_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_v2i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_v2i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umax.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) ret i16 %r } @@ -312,11 +275,10 @@ define signext i16 @vpreduce_smax_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smax.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) ret i16 %r @@ -325,27 +287,13 @@ declare i16 @llvm.vp.reduce.umin.v2i16(i16, <2 x i16>, <2 x i1>, i32) define signext i16 @vpreduce_umin_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_v2i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_v2i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umin.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) ret i16 %r } @@ -355,11 +303,10 @@ define signext i16 @vpreduce_smin_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smin.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) ret i16 %r @@ -370,11 +317,10 @@ define signext i16 @vpreduce_and_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.and.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) ret i16 %r @@ -385,11 +331,10 @@ define signext i16 @vpreduce_or_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.or.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) ret i16 %r @@ -400,11 +345,10 @@ define signext i16 @vpreduce_xor_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.xor.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl) ret i16 %r @@ -415,11 +359,10 @@ define signext i16 @vpreduce_add_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.add.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) ret i16 %r @@ -428,27 +371,13 @@ declare i16 @llvm.vp.reduce.umax.v4i16(i16, <4 x i16>, <4 x i1>, i32) define signext i16 @vpreduce_umax_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_v4i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_v4i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umax.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) ret i16 %r } @@ -458,11 +387,10 @@ define signext i16 @vpreduce_smax_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smax.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) ret i16 %r @@ -471,27 +399,13 @@ declare i16 @llvm.vp.reduce.umin.v4i16(i16, <4 x i16>, <4 x i1>, i32) define signext i16 @vpreduce_umin_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_v4i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_v4i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umin.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) ret i16 %r } @@ -501,11 +415,10 @@ define signext i16 @vpreduce_smin_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smin.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) ret i16 %r @@ -516,11 +429,10 @@ define signext i16 @vpreduce_and_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.and.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) ret i16 %r @@ -531,11 +443,10 @@ define signext i16 @vpreduce_or_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.or.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) ret i16 %r @@ -546,11 +457,10 @@ define signext i16 @vpreduce_xor_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.xor.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl) ret i16 %r @@ -561,11 +471,10 @@ define signext i32 @vpreduce_add_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.add.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) ret i32 %r @@ -574,25 +483,13 @@ declare i32 @llvm.vp.reduce.umax.v2i32(i32, <2 x i32>, <2 x i1>, i32) define signext i32 @vpreduce_umax_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_v2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umax.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) ret i32 %r } @@ -602,11 +499,10 @@ define signext i32 @vpreduce_smax_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smax.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) ret i32 %r @@ -615,25 +511,13 @@ declare i32 @llvm.vp.reduce.umin.v2i32(i32, <2 x i32>, <2 x i1>, i32) define signext i32 @vpreduce_umin_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_v2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_v2i32: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umin.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) ret i32 %r } @@ -643,11 +527,10 @@ define signext i32 @vpreduce_smin_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smin.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) ret i32 %r @@ -658,11 +541,10 @@ define signext i32 @vpreduce_and_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.and.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) ret i32 %r @@ -673,11 +555,10 @@ define signext i32 @vpreduce_or_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.or.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) ret i32 %r @@ -688,11 +569,10 @@ define signext i32 @vpreduce_xor_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.xor.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl) ret i32 %r @@ -703,11 +583,10 @@ define signext i32 @vpreduce_add_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.add.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) ret i32 %r @@ -716,25 +595,13 @@ declare i32 @llvm.vp.reduce.umax.v4i32(i32, <4 x i32>, <4 x i1>, i32) define signext i32 @vpreduce_umax_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umax.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) ret i32 %r } @@ -744,11 +611,10 @@ define signext i32 @vpreduce_smax_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smax.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) ret i32 %r @@ -757,25 +623,13 @@ declare i32 @llvm.vp.reduce.umin.v4i32(i32, <4 x i32>, <4 x i1>, i32) define signext i32 @vpreduce_umin_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_v4i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_v4i32: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umin.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) ret i32 %r } @@ -785,11 +639,10 @@ define signext i32 @vpreduce_smin_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smin.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) ret i32 %r @@ -800,11 +653,10 @@ define signext i32 @vpreduce_and_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.and.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) ret i32 %r @@ -815,11 +667,10 @@ define signext i32 @vpreduce_or_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.or.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) ret i32 %r @@ -830,11 +681,10 @@ define signext i32 @vpreduce_xor_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.xor.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) ret i32 %r @@ -855,16 +705,16 @@ ; CHECK-NEXT: .LBB49_2: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v25, a0 -; CHECK-NEXT: vsetvli zero, a2, e32, m8, tu, ma -; CHECK-NEXT: vredxor.vs v25, v8, v25, v0.t -; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vredxor.vs v8, v8, v25, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: addi a0, a1, -32 ; CHECK-NEXT: sltu a1, a1, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vredxor.vs v8, v16, v8, v0.t ; CHECK-NEXT: vmv.x.s a0, v8 @@ -883,25 +733,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredsum.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredsum.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_add_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredsum.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredsum.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.add.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) ret i64 %r @@ -917,25 +765,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umax_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umax.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) ret i64 %r @@ -951,25 +797,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredmax.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredmax.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_smax_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredmax.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredmax.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smax.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) ret i64 %r @@ -985,25 +829,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredminu.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umin_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredminu.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umin.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) ret i64 %r @@ -1019,25 +861,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredmin.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredmin.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_smin_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredmin.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredmin.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smin.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) ret i64 %r @@ -1053,25 +893,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredand.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredand.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_and_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredand.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredand.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.and.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) ret i64 %r @@ -1087,25 +925,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredor.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredor.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_or_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredor.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredor.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.or.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) ret i64 %r @@ -1121,25 +957,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredxor.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredxor.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_xor_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredxor.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredxor.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.xor.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl) ret i64 %r @@ -1157,12 +991,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredsum.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vredsum.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1171,9 +1005,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredsum.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vredsum.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.add.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) ret i64 %r @@ -1191,12 +1025,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredmaxu.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vredmaxu.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1205,9 +1039,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredmaxu.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vredmaxu.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umax.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) ret i64 %r @@ -1225,12 +1059,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredmax.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vredmax.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1239,9 +1073,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredmax.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vredmax.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smax.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) ret i64 %r @@ -1259,12 +1093,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredminu.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vredminu.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1273,9 +1107,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredminu.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vredminu.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umin.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) ret i64 %r @@ -1293,12 +1127,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredmin.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vredmin.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1307,9 +1141,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredmin.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vredmin.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smin.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) ret i64 %r @@ -1327,12 +1161,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredand.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vredand.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1341,9 +1175,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredand.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vredand.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.and.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) ret i64 %r @@ -1361,12 +1195,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredor.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vredor.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1375,9 +1209,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredor.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vredor.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.or.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) ret i64 %r @@ -1395,12 +1229,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredxor.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vredxor.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1409,9 +1243,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredxor.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vredxor.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.xor.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl) ret i64 %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -23,7 +23,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -39,7 +39,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -55,7 +55,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -71,7 +71,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -208,7 +208,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -220,11 +220,10 @@ define i16 @vwreduce_add_v2i16(<2 x i8>* %x) { ; CHECK-LABEL: vwreduce_add_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -238,11 +237,10 @@ define i16 @vwreduce_uadd_v2i16(<2 x i8>* %x) { ; CHECK-LABEL: vwreduce_uadd_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -260,7 +258,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -272,11 +270,10 @@ define i16 @vwreduce_add_v4i16(<4 x i8>* %x) { ; CHECK-LABEL: vwreduce_add_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -290,11 +287,10 @@ define i16 @vwreduce_uadd_v4i16(<4 x i8>* %x) { ; CHECK-LABEL: vwreduce_uadd_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -312,7 +308,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -326,7 +322,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -343,7 +339,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -638,7 +634,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -650,11 +646,10 @@ define i32 @vwreduce_add_v2i32(<2 x i16>* %x) { ; CHECK-LABEL: vwreduce_add_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -668,11 +663,10 @@ define i32 @vwreduce_uadd_v2i32(<2 x i16>* %x) { ; CHECK-LABEL: vwreduce_uadd_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -690,7 +684,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -704,7 +698,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -721,7 +715,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -1043,7 +1037,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v9, zero +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vredsum.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1056,7 +1050,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v9, zero +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vredsum.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1070,7 +1064,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vmv.s.x v9, zero +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV32-NEXT: vwredsum.vs v8, v8, v9 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -1085,7 +1079,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vmv.s.x v9, zero +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -1102,7 +1096,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vmv.s.x v9, zero +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV32-NEXT: vwredsumu.vs v8, v8, v9 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -1117,7 +1111,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vmv.s.x v9, zero +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -1883,9 +1877,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1901,9 +1893,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1919,9 +1909,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1937,9 +1925,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2048,9 +2034,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2066,9 +2050,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2084,9 +2066,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2194,9 +2174,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2212,9 +2190,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2331,9 +2307,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.v.i v9, -1 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vredand.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -2346,9 +2320,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v9, -1 -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vredand.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2567,7 +2539,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2583,7 +2555,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2599,7 +2571,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2615,7 +2587,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2724,7 +2696,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2740,7 +2712,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2756,7 +2728,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2862,7 +2834,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2878,7 +2850,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -2991,7 +2963,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v9, zero +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vredor.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -3004,7 +2976,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v9, zero +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vredor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -3203,7 +3175,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3219,7 +3191,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3235,7 +3207,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3251,7 +3223,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3360,7 +3332,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3376,7 +3348,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3392,7 +3364,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3498,7 +3470,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3514,7 +3486,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3627,7 +3599,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v9, zero +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vredxor.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -3640,7 +3612,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v9, zero +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vredxor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -3840,7 +3812,7 @@ ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3857,7 +3829,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3874,7 +3846,7 @@ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -3891,7 +3863,7 @@ ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4006,7 +3978,7 @@ ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v9, a0 +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4017,7 +3989,7 @@ ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4035,7 +4007,7 @@ ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v9, a0 +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4046,7 +4018,7 @@ ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4064,7 +4036,7 @@ ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v9, a0 +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4075,7 +4047,7 @@ ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4247,7 +4219,7 @@ ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v9, a0 +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4258,7 +4230,7 @@ ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4276,7 +4248,7 @@ ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v9, a0 +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -4287,7 +4259,7 @@ ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4469,9 +4441,7 @@ ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -4487,7 +4457,7 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4751,7 +4721,7 @@ ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4768,7 +4738,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4785,7 +4755,7 @@ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4802,7 +4772,7 @@ ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4916,7 +4886,7 @@ ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4933,7 +4903,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -4950,7 +4920,7 @@ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5061,7 +5031,7 @@ ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5078,7 +5048,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5201,9 +5171,7 @@ ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: sw zero, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vredmax.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -5219,7 +5187,7 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmax.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -5472,9 +5440,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5490,9 +5456,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5508,9 +5472,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5526,9 +5488,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5637,9 +5597,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5655,9 +5613,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5673,9 +5629,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5783,9 +5737,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5801,9 +5753,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, -1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -5920,9 +5870,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.v.i v9, -1 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vredminu.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -5935,9 +5883,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v9, -1 -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vredminu.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -6156,7 +6102,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6172,7 +6118,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6188,7 +6134,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6204,7 +6150,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6313,7 +6259,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6329,7 +6275,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6345,7 +6291,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6451,7 +6397,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6467,7 +6413,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -6580,7 +6526,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmv.s.x v9, zero +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vredmaxu.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -6593,7 +6539,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v9, zero +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vredmaxu.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll --- a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll @@ -245,11 +245,11 @@ define float @reduce_fadd(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fadd: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret entry: %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float %x, <4 x float> %v) @@ -259,11 +259,11 @@ define float @reduce_fadd2(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fadd2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, ft0, fa0 ; CHECK-NEXT: ret entry: %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> %v) @@ -274,11 +274,13 @@ define float @reduce_fmax(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fmax: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI18_0) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vfredmax.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fmax.s fa0, fa0, ft0 ; CHECK-NEXT: ret entry: %rdx = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) @@ -289,11 +291,13 @@ define float @reduce_fmin(float %x, <4 x float> %v) { ; CHECK-LABEL: reduce_fmin: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vlse32.v v9, (a0), zero ; CHECK-NEXT: vfredmin.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fmin.s fa0, fa0, ft0 ; CHECK-NEXT: ret entry: %rdx = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -1,19 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare half @llvm.vector.reduce.fadd.nxv1f16(half, ) define half @vreduce_fadd_nxv1f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: fneg.h ft0, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %red = call reassoc half @llvm.vector.reduce.fadd.nxv1f16(half %s, %v) ret half %red @@ -22,9 +24,8 @@ define half @vreduce_ord_fadd_nxv1f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -37,11 +38,13 @@ define half @vreduce_fadd_nxv2f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: fneg.h ft0, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %red = call reassoc half @llvm.vector.reduce.fadd.nxv2f16(half %s, %v) ret half %red @@ -50,9 +53,8 @@ define half @vreduce_ord_fadd_nxv2f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -65,11 +67,13 @@ define half @vreduce_fadd_nxv4f16( %v, half %s) { ; CHECK-LABEL: vreduce_fadd_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: fneg.h ft0, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %red = call reassoc half @llvm.vector.reduce.fadd.nxv4f16(half %s, %v) ret half %red @@ -78,9 +82,8 @@ define half @vreduce_ord_fadd_nxv4f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -93,11 +96,13 @@ define float @vreduce_fadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_fadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) ret float %red @@ -106,9 +111,8 @@ define float @vreduce_ord_fadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -119,12 +123,15 @@ define float @vreduce_fwadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_fwadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %e = fpext %v to %red = call reassoc float @llvm.vector.reduce.fadd.nxv1f32(float %s, %e) @@ -134,9 +141,9 @@ define float @vreduce_ord_fwadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -151,11 +158,13 @@ define float @vreduce_fadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: vreduce_fadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, %v) ret float %red @@ -164,9 +173,8 @@ define float @vreduce_ord_fadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -177,12 +185,15 @@ define float @vreduce_fwadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: vreduce_fwadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fneg.s ft0, ft0 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %e = fpext %v to %red = call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float %s, %e) @@ -192,9 +203,9 @@ define float @vreduce_ord_fwadd_nxv2f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -265,14 +276,27 @@ declare double @llvm.vector.reduce.fadd.nxv1f64(double, ) define double @vreduce_fadd_nxv1f64( %v, double %s) { -; CHECK-LABEL: vreduce_fadd_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fadd_nxv1f64: +; RV32: # %bb.0: +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfmv.v.f v9, ft0 +; RV32-NEXT: vfredusum.vs v8, v8, v9 +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fadd_nxv1f64: +; RV64: # %bb.0: +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfmv.v.f v9, ft0 +; RV64-NEXT: vfredusum.vs v8, v8, v9 +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, %v) ret double %red } @@ -280,9 +304,8 @@ define double @vreduce_ord_fadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -291,15 +314,31 @@ } define double @vreduce_fwadd_nxv1f64( %v, double %s) { -; CHECK-LABEL: vreduce_fwadd_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfwredusum.vs v8, v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: ret +; RV32-LABEL: vreduce_fwadd_nxv1f64: +; RV32: # %bb.0: +; RV32-NEXT: fcvt.d.w ft0, zero +; RV32-NEXT: fneg.d ft0, ft0 +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vfmv.v.f v9, ft0 +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-NEXT: vfwredusum.vs v8, v8, v9 +; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fadd.d fa0, fa0, ft0 +; RV32-NEXT: ret +; +; RV64-LABEL: vreduce_fwadd_nxv1f64: +; RV64: # %bb.0: +; RV64-NEXT: fmv.d.x ft0, zero +; RV64-NEXT: fneg.d ft0, ft0 +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vfmv.v.f v9, ft0 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vfwredusum.vs v8, v8, v9 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fadd.d fa0, fa0, ft0 +; RV64-NEXT: ret %e = fpext %v to %red = call reassoc double @llvm.vector.reduce.fadd.nxv1f64(double %s, %e) ret double %red @@ -308,9 +347,9 @@ define double @vreduce_ord_fwadd_nxv1f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -443,9 +482,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI30_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -458,9 +496,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI31_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI31_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -473,9 +510,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI32_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI32_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -490,9 +526,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI33_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI33_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -507,9 +542,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI34_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI34_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -543,9 +577,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI36_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI36_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -558,9 +591,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI37_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI37_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -573,9 +605,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI38_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI38_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -590,9 +621,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI39_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI39_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -643,9 +673,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI42_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI42_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -658,9 +687,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI43_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI43_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -673,9 +701,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI44_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI44_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -743,9 +770,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI48_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI48_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -758,9 +784,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI49_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI49_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -773,9 +798,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI50_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI50_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -790,9 +814,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI51_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI51_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -807,9 +830,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI52_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI52_0) -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -843,9 +865,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI54_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI54_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -858,9 +879,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI55_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI55_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -873,9 +893,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI56_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI56_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -890,9 +909,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI57_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI57_0) -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -943,9 +961,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI60_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI60_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -958,9 +975,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI61_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI61_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -973,9 +989,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI62_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI62_0) -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1039,11 +1054,11 @@ define float @vreduce_nsz_fadd_nxv1f32( %v, float %s) { ; CHECK-LABEL: vreduce_nsz_fadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) ret float %red @@ -1066,9 +1081,8 @@ ; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1162,11 +1176,10 @@ ; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v9, a1 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v8, v9 -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret %red = call reassoc half @llvm.vector.reduce.fadd.nxv3f16(half %s, %v) ret half %red diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll @@ -9,11 +9,10 @@ define half @vpreduce_fadd_nxv1f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, %v, %m, i32 %evl) ret half %r @@ -22,11 +21,10 @@ define half @vpreduce_ord_fadd_nxv1f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, %v, %m, i32 %evl) ret half %r @@ -37,11 +35,10 @@ define half @vpreduce_fadd_nxv2f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, %v, %m, i32 %evl) ret half %r @@ -50,11 +47,10 @@ define half @vpreduce_ord_fadd_nxv2f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, %v, %m, i32 %evl) ret half %r @@ -65,11 +61,10 @@ define half @vpreduce_fadd_nxv4f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, %v, %m, i32 %evl) ret half %r @@ -78,11 +73,10 @@ define half @vpreduce_ord_fadd_nxv4f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, %v, %m, i32 %evl) ret half %r @@ -108,12 +102,12 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: .LBB6_2: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma -; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfredusum.vs v8, v8, v25, v0.t +; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v8, ft0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfredusum.vs v8, v16, v8, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -140,12 +134,12 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma -; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfredosum.vs v8, v8, v25, v0.t +; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v8, ft0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfredosum.vs v8, v16, v8, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -159,11 +153,10 @@ define float @vpreduce_fadd_nxv1f32(float %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, %v, %m, i32 %evl) ret float %r @@ -172,11 +165,10 @@ define float @vpreduce_ord_fadd_nxv1f32(float %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, %v, %m, i32 %evl) ret float %r @@ -187,11 +179,10 @@ define float @vpreduce_fadd_nxv2f32(float %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, %v, %m, i32 %evl) ret float %r @@ -200,11 +191,10 @@ define float @vpreduce_ord_fadd_nxv2f32(float %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, %v, %m, i32 %evl) ret float %r @@ -217,9 +207,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; CHECK-NEXT: vfredusum.vs v10, v8, v10, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredusum.vs v8, v8, v10, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, %v, %m, i32 %evl) ret float %r @@ -230,9 +220,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; CHECK-NEXT: vfredosum.vs v10, v8, v10, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredosum.vs v8, v8, v10, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, %v, %m, i32 %evl) ret float %r @@ -243,11 +233,10 @@ define double @vpreduce_fadd_nxv1f64(double %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma -; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, %v, %m, i32 %evl) ret double %r @@ -256,11 +245,10 @@ define double @vpreduce_ord_fadd_nxv1f64(double %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma -; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, %v, %m, i32 %evl) ret double %r @@ -273,9 +261,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma -; CHECK-NEXT: vfredusum.vs v10, v8, v10, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfredusum.vs v8, v8, v10, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, %v, %m, i32 %evl) ret double %r @@ -286,9 +274,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma -; CHECK-NEXT: vfredosum.vs v10, v8, v10, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfredosum.vs v8, v8, v10, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, %v, %m, i32 %evl) ret double %r @@ -301,9 +289,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma -; CHECK-NEXT: vfredusum.vs v12, v8, v12, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfredusum.vs v8, v8, v12, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc double @llvm.vp.reduce.fadd.nxv3f64(double %s, %v, %m, i32 %evl) ret double %r @@ -314,9 +302,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma -; CHECK-NEXT: vfredosum.vs v12, v8, v12, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfredosum.vs v8, v8, v12, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call double @llvm.vp.reduce.fadd.nxv3f64(double %s, %v, %m, i32 %evl) ret double %r @@ -329,9 +317,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma -; CHECK-NEXT: vfredusum.vs v12, v8, v12, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfredusum.vs v8, v8, v12, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, %v, %m, i32 %evl) ret double %r @@ -342,9 +330,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma -; CHECK-NEXT: vfredosum.vs v12, v8, v12, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfredosum.vs v8, v8, v12, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, %v, %m, i32 %evl) ret double %r diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll @@ -9,11 +9,10 @@ define signext i8 @vpreduce_add_nxv1i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.add.nxv1i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -24,12 +23,10 @@ define signext i8 @vpreduce_umax_nxv1i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -40,11 +37,10 @@ define signext i8 @vpreduce_smax_nxv1i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -55,12 +51,10 @@ define signext i8 @vpreduce_umin_nxv1i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -71,11 +65,10 @@ define signext i8 @vpreduce_smin_nxv1i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -86,11 +79,10 @@ define signext i8 @vpreduce_and_nxv1i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.and.nxv1i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -101,11 +93,10 @@ define signext i8 @vpreduce_or_nxv1i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.or.nxv1i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -116,11 +107,10 @@ define signext i8 @vpreduce_xor_nxv1i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -131,11 +121,10 @@ define signext i8 @vpreduce_add_nxv2i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.add.nxv2i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -146,12 +135,10 @@ define signext i8 @vpreduce_umax_nxv2i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -162,11 +149,10 @@ define signext i8 @vpreduce_smax_nxv2i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -177,12 +163,10 @@ define signext i8 @vpreduce_umin_nxv2i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -193,11 +177,10 @@ define signext i8 @vpreduce_smin_nxv2i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -208,11 +191,10 @@ define signext i8 @vpreduce_and_nxv2i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.and.nxv2i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -223,11 +205,10 @@ define signext i8 @vpreduce_or_nxv2i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.or.nxv2i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -238,11 +219,10 @@ define signext i8 @vpreduce_xor_nxv2i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -253,11 +233,10 @@ define signext i8 @vpreduce_smax_nxv3i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv3i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smax.nxv3i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -268,11 +247,10 @@ define signext i8 @vpreduce_add_nxv4i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.add.nxv4i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -283,12 +261,10 @@ define signext i8 @vpreduce_umax_nxv4i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -299,11 +275,10 @@ define signext i8 @vpreduce_smax_nxv4i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -314,12 +289,10 @@ define signext i8 @vpreduce_umin_nxv4i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umin_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a0, a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -330,11 +303,10 @@ define signext i8 @vpreduce_smin_nxv4i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -345,11 +317,10 @@ define signext i8 @vpreduce_and_nxv4i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.and.nxv4i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -360,11 +331,10 @@ define signext i8 @vpreduce_or_nxv4i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.or.nxv4i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -375,11 +345,10 @@ define signext i8 @vpreduce_xor_nxv4i8(i8 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 %s, %v, %m, i32 %evl) ret i8 %r @@ -390,11 +359,10 @@ define signext i16 @vpreduce_add_nxv1i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.add.nxv1i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -403,27 +371,13 @@ declare i16 @llvm.vp.reduce.umax.nxv1i16(i16, , , i32) define signext i16 @vpreduce_umax_nxv1i16(i16 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv1i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv1i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 %s, %v, %m, i32 %evl) ret i16 %r } @@ -433,11 +387,10 @@ define signext i16 @vpreduce_smax_nxv1i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -446,27 +399,13 @@ declare i16 @llvm.vp.reduce.umin.nxv1i16(i16, , , i32) define signext i16 @vpreduce_umin_nxv1i16(i16 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_nxv1i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_nxv1i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 %s, %v, %m, i32 %evl) ret i16 %r } @@ -476,11 +415,10 @@ define signext i16 @vpreduce_smin_nxv1i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -491,11 +429,10 @@ define signext i16 @vpreduce_and_nxv1i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.and.nxv1i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -506,11 +443,10 @@ define signext i16 @vpreduce_or_nxv1i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.or.nxv1i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -521,11 +457,10 @@ define signext i16 @vpreduce_xor_nxv1i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -536,11 +471,10 @@ define signext i16 @vpreduce_add_nxv2i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.add.nxv2i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -549,27 +483,13 @@ declare i16 @llvm.vp.reduce.umax.nxv2i16(i16, , , i32) define signext i16 @vpreduce_umax_nxv2i16(i16 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv2i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv2i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 %s, %v, %m, i32 %evl) ret i16 %r } @@ -579,11 +499,10 @@ define signext i16 @vpreduce_smax_nxv2i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -592,27 +511,13 @@ declare i16 @llvm.vp.reduce.umin.nxv2i16(i16, , , i32) define signext i16 @vpreduce_umin_nxv2i16(i16 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_nxv2i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_nxv2i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 %s, %v, %m, i32 %evl) ret i16 %r } @@ -622,11 +527,10 @@ define signext i16 @vpreduce_smin_nxv2i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -637,11 +541,10 @@ define signext i16 @vpreduce_and_nxv2i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.and.nxv2i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -652,11 +555,10 @@ define signext i16 @vpreduce_or_nxv2i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.or.nxv2i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -667,11 +569,10 @@ define signext i16 @vpreduce_xor_nxv2i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -682,11 +583,10 @@ define signext i16 @vpreduce_add_nxv4i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.add.nxv4i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -695,27 +595,13 @@ declare i16 @llvm.vp.reduce.umax.nxv4i16(i16, , , i32) define signext i16 @vpreduce_umax_nxv4i16(i16 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv4i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv4i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 %s, %v, %m, i32 %evl) ret i16 %r } @@ -725,11 +611,10 @@ define signext i16 @vpreduce_smax_nxv4i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -738,27 +623,13 @@ declare i16 @llvm.vp.reduce.umin.nxv4i16(i16, , , i32) define signext i16 @vpreduce_umin_nxv4i16(i16 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_nxv4i16: -; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_nxv4i16: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 %s, %v, %m, i32 %evl) ret i16 %r } @@ -768,11 +639,10 @@ define signext i16 @vpreduce_smin_nxv4i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -783,11 +653,10 @@ define signext i16 @vpreduce_and_nxv4i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.and.nxv4i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -798,11 +667,10 @@ define signext i16 @vpreduce_or_nxv4i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.or.nxv4i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -813,11 +681,10 @@ define signext i16 @vpreduce_xor_nxv4i16(i16 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 %s, %v, %m, i32 %evl) ret i16 %r @@ -828,11 +695,10 @@ define signext i32 @vpreduce_add_nxv1i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.add.nxv1i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -841,25 +707,13 @@ declare i32 @llvm.vp.reduce.umax.nxv1i32(i32, , , i32) define signext i32 @vpreduce_umax_nxv1i32(i32 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 %s, %v, %m, i32 %evl) ret i32 %r } @@ -869,11 +723,10 @@ define signext i32 @vpreduce_smax_nxv1i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -882,25 +735,13 @@ declare i32 @llvm.vp.reduce.umin.nxv1i32(i32, , , i32) define signext i32 @vpreduce_umin_nxv1i32(i32 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_nxv1i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_nxv1i32: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 %s, %v, %m, i32 %evl) ret i32 %r } @@ -910,11 +751,10 @@ define signext i32 @vpreduce_smin_nxv1i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -925,11 +765,10 @@ define signext i32 @vpreduce_and_nxv1i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.and.nxv1i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -940,11 +779,10 @@ define signext i32 @vpreduce_or_nxv1i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.or.nxv1i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -955,11 +793,10 @@ define signext i32 @vpreduce_xor_nxv1i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -970,11 +807,10 @@ define signext i32 @vpreduce_add_nxv2i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.add.nxv2i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -983,25 +819,13 @@ declare i32 @llvm.vp.reduce.umax.nxv2i32(i32, , , i32) define signext i32 @vpreduce_umax_nxv2i32(i32 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umax_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umax_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umax_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 %s, %v, %m, i32 %evl) ret i32 %r } @@ -1011,11 +835,10 @@ define signext i32 @vpreduce_smax_nxv2i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smax_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1024,25 +847,13 @@ declare i32 @llvm.vp.reduce.umin.nxv2i32(i32, , , i32) define signext i32 @vpreduce_umin_nxv2i32(i32 signext %s, %v, %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_umin_nxv2i32: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_umin_nxv2i32: -; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_umin_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredminu.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 %s, %v, %m, i32 %evl) ret i32 %r } @@ -1052,11 +863,10 @@ define signext i32 @vpreduce_smin_nxv2i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1067,11 +877,10 @@ define signext i32 @vpreduce_and_nxv2i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredand.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.and.nxv2i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1082,11 +891,10 @@ define signext i32 @vpreduce_or_nxv2i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.or.nxv2i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1097,11 +905,10 @@ define signext i32 @vpreduce_xor_nxv2i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vredxor.vs v8, v8, v9, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1114,9 +921,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; CHECK-NEXT: vredsum.vs v10, v8, v10, v0.t -; CHECK-NEXT: vmv.x.s a0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vredsum.vs v8, v8, v10, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.add.nxv4i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1129,9 +936,9 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vmv.s.x v10, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; RV32-NEXT: vredmaxu.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; RV32-NEXT: vredmaxu.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umax_nxv4i32: @@ -1140,9 +947,9 @@ ; RV64-NEXT: srli a0, a0, 32 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; RV64-NEXT: vredmaxu.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; RV64-NEXT: vredmaxu.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1168,12 +975,12 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a3 ; RV32-NEXT: .LBB67_2: -; RV32-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; RV32-NEXT: vredmaxu.vs v25, v8, v25, v0.t -; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vredmaxu.vs v8, v8, v25, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vsetvli zero, a2, e32, m8, tu, ma +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: vredmaxu.vs v8, v16, v8, v0.t ; RV32-NEXT: vmv.x.s a0, v8 @@ -1198,12 +1005,12 @@ ; RV64-NEXT: .LBB67_2: ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vmv.s.x v25, a2 -; RV64-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; RV64-NEXT: vredmaxu.vs v25, v8, v25, v0.t -; RV64-NEXT: vmv.x.s a1, v25 +; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV64-NEXT: vredmaxu.vs v8, v8, v25, v0.t +; RV64-NEXT: vmv.x.s a1, v8 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vmv.s.x v8, a1 -; RV64-NEXT: vsetvli zero, a0, e32, m8, tu, ma +; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vredmaxu.vs v8, v16, v8, v0.t ; RV64-NEXT: vmv.x.s a0, v8 @@ -1219,9 +1026,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; CHECK-NEXT: vredmax.vs v10, v8, v10, v0.t -; CHECK-NEXT: vmv.x.s a0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vredmax.vs v8, v8, v10, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1234,9 +1041,9 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vmv.s.x v10, a0 -; RV32-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; RV32-NEXT: vredminu.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; RV32-NEXT: vredminu.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umin_nxv4i32: @@ -1245,9 +1052,9 @@ ; RV64-NEXT: srli a0, a0, 32 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; RV64-NEXT: vredminu.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; RV64-NEXT: vredminu.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1260,9 +1067,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; CHECK-NEXT: vredmin.vs v10, v8, v10, v0.t -; CHECK-NEXT: vmv.x.s a0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vredmin.vs v8, v8, v10, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1275,9 +1082,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; CHECK-NEXT: vredand.vs v10, v8, v10, v0.t -; CHECK-NEXT: vmv.x.s a0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vredand.vs v8, v8, v10, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.and.nxv4i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1290,9 +1097,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; CHECK-NEXT: vredor.vs v10, v8, v10, v0.t -; CHECK-NEXT: vmv.x.s a0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vredor.vs v8, v8, v10, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.or.nxv4i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1305,9 +1112,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; CHECK-NEXT: vredxor.vs v10, v8, v10, v0.t -; CHECK-NEXT: vmv.x.s a0, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vredxor.vs v8, v8, v10, v0.t +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 %s, %v, %m, i32 %evl) ret i32 %r @@ -1323,25 +1130,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredsum.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredsum.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_add_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredsum.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredsum.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.add.nxv1i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1355,27 +1160,26 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e32, mf2, tu, ma -; RV32-NEXT: vwredsum.vs v9, v8, v9, v0.t +; RV32-NEXT: vwredsum.vs v8, v8, v9, v0.t ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpwreduce_add_nxv1i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV64-NEXT: vwredsum.vs v9, v8, v9, v0.t +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vwredsum.vs v8, v8, v9, v0.t ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %e = sext %v to %r = call i64 @llvm.vp.reduce.add.nxv1i64(i64 %s, %e, %m, i32 %evl) @@ -1390,27 +1194,26 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e32, mf2, tu, ma -; RV32-NEXT: vwredsum.vs v9, v8, v9, v0.t +; RV32-NEXT: vwredsum.vs v8, v8, v9, v0.t ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpwreduce_uadd_nxv1i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, mf2, tu, ma -; RV64-NEXT: vwredsum.vs v9, v8, v9, v0.t +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64-NEXT: vwredsum.vs v8, v8, v9, v0.t ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %e = sext %v to %r = call i64 @llvm.vp.reduce.add.nxv1i64(i64 %s, %e, %m, i32 %evl) @@ -1427,25 +1230,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umax_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredmaxu.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1461,25 +1262,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredmax.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredmax.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_smax_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredmax.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredmax.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1495,25 +1294,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredminu.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umin_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredminu.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1529,25 +1326,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredmin.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredmin.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_smin_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredmin.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredmin.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1563,25 +1358,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredand.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredand.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_and_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredand.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredand.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.and.nxv1i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1597,25 +1390,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredor.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredor.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_or_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredor.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredor.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.or.nxv1i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1631,25 +1422,23 @@ ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma -; RV32-NEXT: vredxor.vs v9, v8, v9, v0.t -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vredxor.vs v8, v8, v9, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_xor_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma -; RV64-NEXT: vredxor.vs v9, v8, v9, v0.t -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 +; RV64-NEXT: vredxor.vs v8, v8, v9, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1667,12 +1456,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredsum.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vredsum.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1681,9 +1470,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredsum.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vredsum.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.add.nxv2i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1699,13 +1488,13 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e32, m1, tu, ma -; RV32-NEXT: vwredsum.vs v9, v8, v9, v0.t +; RV32-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; RV32-NEXT: vwredsum.vs v8, v8, v9, v0.t ; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1714,10 +1503,10 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV64-NEXT: vwredsum.vs v9, v8, v9, v0.t +; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; RV64-NEXT: vwredsum.vs v8, v8, v9, v0.t ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %e = sext %v to %r = call i64 @llvm.vp.reduce.add.nxv2i64(i64 %s, %e, %m, i32 %evl) @@ -1734,13 +1523,13 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e32, m1, tu, ma -; RV32-NEXT: vwredsum.vs v9, v8, v9, v0.t +; RV32-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; RV32-NEXT: vwredsum.vs v8, v8, v9, v0.t ; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV32-NEXT: vmv.x.s a0, v9 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v9, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1749,10 +1538,10 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m1, tu, ma -; RV64-NEXT: vwredsum.vs v9, v8, v9, v0.t +; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; RV64-NEXT: vwredsum.vs v8, v8, v9, v0.t ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV64-NEXT: vmv.x.s a0, v9 +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %e = sext %v to %r = call i64 @llvm.vp.reduce.add.nxv2i64(i64 %s, %e, %m, i32 %evl) @@ -1771,12 +1560,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredmaxu.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vredmaxu.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1785,9 +1574,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredmaxu.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vredmaxu.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1805,12 +1594,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredmax.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vredmax.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1819,9 +1608,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredmax.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vredmax.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1839,12 +1628,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredminu.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vredminu.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1853,9 +1642,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredminu.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vredminu.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1873,12 +1662,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredmin.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vredmin.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1887,9 +1676,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredmin.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vredmin.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1907,12 +1696,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredand.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vredand.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1921,9 +1710,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredand.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vredand.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.and.nxv2i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1941,12 +1730,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredor.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vredor.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1955,9 +1744,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredor.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vredor.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.or.nxv2i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -1975,12 +1764,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma -; RV32-NEXT: vredxor.vs v10, v8, v10, v0.t -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vredxor.vs v8, v8, v10, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1989,9 +1778,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; RV64-NEXT: vredxor.vs v10, v8, v10, v0.t -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vredxor.vs v8, v8, v10, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -2009,12 +1798,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma -; RV32-NEXT: vredsum.vs v12, v8, v12, v0.t -; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vredsum.vs v8, v8, v12, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v12, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2023,9 +1812,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma -; RV64-NEXT: vredsum.vs v12, v8, v12, v0.t -; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vredsum.vs v8, v8, v12, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.add.nxv4i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -2041,13 +1830,13 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e32, m2, tu, ma -; RV32-NEXT: vwredsum.vs v10, v8, v10, v0.t +; RV32-NEXT: vsetvli zero, a2, e32, m2, ta, ma +; RV32-NEXT: vwredsum.vs v8, v8, v10, v0.t ; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2056,10 +1845,10 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; RV64-NEXT: vwredsum.vs v10, v8, v10, v0.t +; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; RV64-NEXT: vwredsum.vs v8, v8, v10, v0.t ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %e = sext %v to %r = call i64 @llvm.vp.reduce.add.nxv4i64(i64 %s, %e, %m, i32 %evl) @@ -2076,13 +1865,13 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e32, m2, tu, ma -; RV32-NEXT: vwredsumu.vs v10, v8, v10, v0.t +; RV32-NEXT: vsetvli zero, a2, e32, m2, ta, ma +; RV32-NEXT: vwredsumu.vs v8, v8, v10, v0.t ; RV32-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v10, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2091,10 +1880,10 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; RV64-NEXT: vwredsumu.vs v10, v8, v10, v0.t +; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; RV64-NEXT: vwredsumu.vs v8, v8, v10, v0.t ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %e = zext %v to %r = call i64 @llvm.vp.reduce.add.nxv4i64(i64 %s, %e, %m, i32 %evl) @@ -2113,12 +1902,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma -; RV32-NEXT: vredmaxu.vs v12, v8, v12, v0.t -; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vredmaxu.vs v8, v8, v12, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v12, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2127,9 +1916,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma -; RV64-NEXT: vredmaxu.vs v12, v8, v12, v0.t -; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vredmaxu.vs v8, v8, v12, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -2147,12 +1936,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma -; RV32-NEXT: vredmax.vs v12, v8, v12, v0.t -; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vredmax.vs v8, v8, v12, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v12, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2161,9 +1950,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma -; RV64-NEXT: vredmax.vs v12, v8, v12, v0.t -; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vredmax.vs v8, v8, v12, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -2181,12 +1970,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma -; RV32-NEXT: vredminu.vs v12, v8, v12, v0.t -; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vredminu.vs v8, v8, v12, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v12, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2195,9 +1984,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma -; RV64-NEXT: vredminu.vs v12, v8, v12, v0.t -; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vredminu.vs v8, v8, v12, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -2215,12 +2004,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma -; RV32-NEXT: vredmin.vs v12, v8, v12, v0.t -; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vredmin.vs v8, v8, v12, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v12, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2229,9 +2018,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma -; RV64-NEXT: vredmin.vs v12, v8, v12, v0.t -; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vredmin.vs v8, v8, v12, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -2249,12 +2038,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma -; RV32-NEXT: vredand.vs v12, v8, v12, v0.t -; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vredand.vs v8, v8, v12, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v12, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2263,9 +2052,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma -; RV64-NEXT: vredand.vs v12, v8, v12, v0.t -; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vredand.vs v8, v8, v12, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.and.nxv4i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -2283,12 +2072,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma -; RV32-NEXT: vredor.vs v12, v8, v12, v0.t -; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vredor.vs v8, v8, v12, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v12, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2297,9 +2086,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma -; RV64-NEXT: vredor.vs v12, v8, v12, v0.t -; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vredor.vs v8, v8, v12, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.or.nxv4i64(i64 %s, %v, %m, i32 %evl) ret i64 %r @@ -2317,12 +2106,12 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma -; RV32-NEXT: vredxor.vs v12, v8, v12, v0.t -; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vredxor.vs v8, v8, v12, v0.t +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v8, v12, a1 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2331,9 +2120,9 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma -; RV64-NEXT: vredxor.vs v12, v8, v12, v0.t -; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vredxor.vs v8, v8, v12, v0.t +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %r = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 %s, %v, %m, i32 %evl) ret i64 %r diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll @@ -9,9 +9,8 @@ define signext i8 @vreduce_add_nxv1i8( %v) { ; CHECK-LABEL: vreduce_add_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -24,9 +23,8 @@ define signext i8 @vreduce_umax_nxv1i8( %v) { ; CHECK-LABEL: vreduce_umax_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -40,9 +38,8 @@ ; CHECK-LABEL: vreduce_smax_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -55,9 +52,8 @@ define signext i8 @vreduce_umin_nxv1i8( %v) { ; CHECK-LABEL: vreduce_umin_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -71,9 +67,8 @@ ; CHECK-LABEL: vreduce_smin_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -86,9 +81,8 @@ define signext i8 @vreduce_and_nxv1i8( %v) { ; CHECK-LABEL: vreduce_and_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -101,9 +95,8 @@ define signext i8 @vreduce_or_nxv1i8( %v) { ; CHECK-LABEL: vreduce_or_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -116,9 +109,8 @@ define signext i8 @vreduce_xor_nxv1i8( %v) { ; CHECK-LABEL: vreduce_xor_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -131,9 +123,8 @@ define signext i8 @vreduce_add_nxv2i8( %v) { ; CHECK-LABEL: vreduce_add_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -146,9 +137,8 @@ define signext i8 @vreduce_umax_nxv2i8( %v) { ; CHECK-LABEL: vreduce_umax_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -162,9 +152,8 @@ ; CHECK-LABEL: vreduce_smax_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -177,9 +166,8 @@ define signext i8 @vreduce_umin_nxv2i8( %v) { ; CHECK-LABEL: vreduce_umin_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -193,9 +181,8 @@ ; CHECK-LABEL: vreduce_smin_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -208,9 +195,8 @@ define signext i8 @vreduce_and_nxv2i8( %v) { ; CHECK-LABEL: vreduce_and_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -223,9 +209,8 @@ define signext i8 @vreduce_or_nxv2i8( %v) { ; CHECK-LABEL: vreduce_or_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -238,9 +223,8 @@ define signext i8 @vreduce_xor_nxv2i8( %v) { ; CHECK-LABEL: vreduce_xor_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -253,9 +237,8 @@ define signext i8 @vreduce_add_nxv4i8( %v) { ; CHECK-LABEL: vreduce_add_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -268,9 +251,8 @@ define signext i8 @vreduce_umax_nxv4i8( %v) { ; CHECK-LABEL: vreduce_umax_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -284,9 +266,8 @@ ; CHECK-LABEL: vreduce_smax_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -299,9 +280,8 @@ define signext i8 @vreduce_umin_nxv4i8( %v) { ; CHECK-LABEL: vreduce_umin_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -315,9 +295,8 @@ ; CHECK-LABEL: vreduce_smin_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -330,9 +309,8 @@ define signext i8 @vreduce_and_nxv4i8( %v) { ; CHECK-LABEL: vreduce_and_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -345,9 +323,8 @@ define signext i8 @vreduce_or_nxv4i8( %v) { ; CHECK-LABEL: vreduce_or_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -360,9 +337,8 @@ define signext i8 @vreduce_xor_nxv4i8( %v) { ; CHECK-LABEL: vreduce_xor_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -375,9 +351,8 @@ define signext i16 @vreduce_add_nxv1i16( %v) { ; CHECK-LABEL: vreduce_add_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -388,9 +363,9 @@ define signext i16 @vwreduce_add_nxv1i8( %v) { ; CHECK-LABEL: vwreduce_add_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -403,9 +378,9 @@ define signext i16 @vwreduce_uadd_nxv1i8( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -420,9 +395,8 @@ define signext i16 @vreduce_umax_nxv1i16( %v) { ; CHECK-LABEL: vreduce_umax_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -436,9 +410,8 @@ ; CHECK-LABEL: vreduce_smax_nxv1i16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -451,9 +424,8 @@ define signext i16 @vreduce_umin_nxv1i16( %v) { ; CHECK-LABEL: vreduce_umin_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -468,9 +440,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -479,9 +450,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -494,9 +464,8 @@ define signext i16 @vreduce_and_nxv1i16( %v) { ; CHECK-LABEL: vreduce_and_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -509,9 +478,8 @@ define signext i16 @vreduce_or_nxv1i16( %v) { ; CHECK-LABEL: vreduce_or_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -524,9 +492,8 @@ define signext i16 @vreduce_xor_nxv1i16( %v) { ; CHECK-LABEL: vreduce_xor_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -539,9 +506,8 @@ define signext i16 @vreduce_add_nxv2i16( %v) { ; CHECK-LABEL: vreduce_add_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -552,9 +518,9 @@ define signext i16 @vwreduce_add_nxv2i8( %v) { ; CHECK-LABEL: vwreduce_add_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -567,9 +533,9 @@ define signext i16 @vwreduce_uadd_nxv2i8( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -584,9 +550,8 @@ define signext i16 @vreduce_umax_nxv2i16( %v) { ; CHECK-LABEL: vreduce_umax_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -600,9 +565,8 @@ ; CHECK-LABEL: vreduce_smax_nxv2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -615,9 +579,8 @@ define signext i16 @vreduce_umin_nxv2i16( %v) { ; CHECK-LABEL: vreduce_umin_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -632,9 +595,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -643,9 +605,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -658,9 +619,8 @@ define signext i16 @vreduce_and_nxv2i16( %v) { ; CHECK-LABEL: vreduce_and_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -673,9 +633,8 @@ define signext i16 @vreduce_or_nxv2i16( %v) { ; CHECK-LABEL: vreduce_or_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -688,9 +647,8 @@ define signext i16 @vreduce_xor_nxv2i16( %v) { ; CHECK-LABEL: vreduce_xor_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -703,9 +661,8 @@ define signext i16 @vreduce_add_nxv4i16( %v) { ; CHECK-LABEL: vreduce_add_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -716,9 +673,9 @@ define signext i16 @vwreduce_add_nxv4i8( %v) { ; CHECK-LABEL: vwreduce_add_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -731,9 +688,9 @@ define signext i16 @vwreduce_uadd_nxv4i8( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -748,9 +705,8 @@ define signext i16 @vreduce_umax_nxv4i16( %v) { ; CHECK-LABEL: vreduce_umax_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -764,9 +720,8 @@ ; CHECK-LABEL: vreduce_smax_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -779,9 +734,8 @@ define signext i16 @vreduce_umin_nxv4i16( %v) { ; CHECK-LABEL: vreduce_umin_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -796,9 +750,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -807,9 +760,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -822,9 +774,8 @@ define signext i16 @vreduce_and_nxv4i16( %v) { ; CHECK-LABEL: vreduce_and_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -837,9 +788,8 @@ define signext i16 @vreduce_or_nxv4i16( %v) { ; CHECK-LABEL: vreduce_or_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -852,9 +802,8 @@ define signext i16 @vreduce_xor_nxv4i16( %v) { ; CHECK-LABEL: vreduce_xor_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -867,9 +816,8 @@ define signext i32 @vreduce_add_nxv1i32( %v) { ; CHECK-LABEL: vreduce_add_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -880,9 +828,9 @@ define signext i32 @vwreduce_add_nxv1i16( %v) { ; CHECK-LABEL: vwreduce_add_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -895,9 +843,9 @@ define signext i32 @vwreduce_uadd_nxv1i16( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -912,9 +860,8 @@ define signext i32 @vreduce_umax_nxv1i32( %v) { ; CHECK-LABEL: vreduce_umax_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -928,9 +875,8 @@ ; CHECK-LABEL: vreduce_smax_nxv1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -943,9 +889,8 @@ define signext i32 @vreduce_umin_nxv1i32( %v) { ; CHECK-LABEL: vreduce_umin_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -960,9 +905,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -971,9 +915,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -986,9 +929,8 @@ define signext i32 @vreduce_and_nxv1i32( %v) { ; CHECK-LABEL: vreduce_and_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1001,9 +943,8 @@ define signext i32 @vreduce_or_nxv1i32( %v) { ; CHECK-LABEL: vreduce_or_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1016,9 +957,8 @@ define signext i32 @vreduce_xor_nxv1i32( %v) { ; CHECK-LABEL: vreduce_xor_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1031,9 +971,8 @@ define signext i32 @vreduce_add_nxv2i32( %v) { ; CHECK-LABEL: vreduce_add_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1044,9 +983,9 @@ define signext i32 @vwreduce_add_nxv2i16( %v) { ; CHECK-LABEL: vwreduce_add_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -1059,9 +998,9 @@ define signext i32 @vwreduce_uadd_nxv2i16( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -1076,9 +1015,8 @@ define signext i32 @vreduce_umax_nxv2i32( %v) { ; CHECK-LABEL: vreduce_umax_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1092,9 +1030,8 @@ ; CHECK-LABEL: vreduce_smax_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1107,9 +1044,8 @@ define signext i32 @vreduce_umin_nxv2i32( %v) { ; CHECK-LABEL: vreduce_umin_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredminu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1124,9 +1060,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v9, a0 ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -1135,9 +1070,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1150,9 +1084,8 @@ define signext i32 @vreduce_and_nxv2i32( %v) { ; CHECK-LABEL: vreduce_and_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, -1 ; CHECK-NEXT: vredand.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1165,9 +1098,8 @@ define signext i32 @vreduce_or_nxv2i32( %v) { ; CHECK-LABEL: vreduce_or_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1180,9 +1112,8 @@ define signext i32 @vreduce_xor_nxv2i32( %v) { ; CHECK-LABEL: vreduce_xor_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1359,9 +1290,8 @@ define i64 @vreduce_add_nxv1i64( %v) { ; RV32-LABEL: vreduce_add_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, zero ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vredsum.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1372,9 +1302,8 @@ ; ; RV64-LABEL: vreduce_add_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vredsum.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1385,9 +1314,9 @@ define i64 @vwreduce_add_nxv1i32( %v) { ; RV32-LABEL: vwreduce_add_nxv1i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV32-NEXT: vwredsum.vs v8, v8, v9 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 @@ -1399,9 +1328,9 @@ ; ; RV64-LABEL: vwreduce_add_nxv1i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, 0 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1414,9 +1343,9 @@ define i64 @vwreduce_uadd_nxv1i32( %v) { ; RV32-LABEL: vwreduce_uadd_nxv1i32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV32-NEXT: vwredsumu.vs v8, v8, v9 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 @@ -1428,9 +1357,9 @@ ; ; RV64-LABEL: vwreduce_uadd_nxv1i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, 0 +; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1445,9 +1374,8 @@ define i64 @vreduce_umax_nxv1i64( %v) { ; RV32-LABEL: vreduce_umax_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, zero ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vredmaxu.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1458,9 +1386,8 @@ ; ; RV64-LABEL: vreduce_umax_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vredmaxu.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1479,9 +1406,8 @@ ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: sw zero, 8(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vredmax.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1495,9 +1421,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmax.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1510,9 +1435,8 @@ define i64 @vreduce_umin_nxv1i64( %v) { ; RV32-LABEL: vreduce_umin_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, -1 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, -1 ; RV32-NEXT: vredminu.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1523,9 +1447,8 @@ ; ; RV64-LABEL: vreduce_umin_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, -1 ; RV64-NEXT: vredminu.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1546,9 +1469,8 @@ ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1562,9 +1484,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v9, a0 ; RV64-NEXT: vredmin.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1577,9 +1498,8 @@ define i64 @vreduce_and_nxv1i64( %v) { ; RV32-LABEL: vreduce_and_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, -1 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, -1 ; RV32-NEXT: vredand.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1590,9 +1510,8 @@ ; ; RV64-LABEL: vreduce_and_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, -1 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, -1 ; RV64-NEXT: vredand.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1605,9 +1524,8 @@ define i64 @vreduce_or_nxv1i64( %v) { ; RV32-LABEL: vreduce_or_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, zero ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vredor.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1618,9 +1536,8 @@ ; ; RV64-LABEL: vreduce_or_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vredor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1633,9 +1550,8 @@ define i64 @vreduce_xor_nxv1i64( %v) { ; RV32-LABEL: vreduce_xor_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v9, zero ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vredxor.vs v8, v8, v9 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 @@ -1646,9 +1562,8 @@ ; ; RV64-LABEL: vreduce_xor_nxv1i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vredxor.vs v8, v8, v9 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret