diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10562,6 +10562,25 @@ break; } + case RISCVISD::VFMV_S_F_VL: { + SDValue Src = N->getOperand(1); + // Try to remove vector->scalar->vector if the scalar->vector is inserting + // into an undef vector. + // TODO: Could use a vslide or vmv.v.v for non-undef. + if (N->getOperand(0).isUndef() && + Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isNullConstant(Src.getOperand(1)) && + Src.getOperand(0).getValueType().isScalableVector()) { + EVT VT = N->getValueType(0); + EVT SrcVT = Src.getOperand(0).getValueType(); + assert(SrcVT.getVectorElementType() == VT.getVectorElementType()); + // Widths match, just return the original vector. + if (SrcVT == VT) + return Src.getOperand(0); + // TODO: Use insert_subvector/extract_subvector to change widen/narrow? + } + break; + } case ISD::INTRINSIC_WO_CHAIN: { unsigned IntNo = N->getConstantOperandVal(0); switch (IntNo) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll @@ -133,17 +133,14 @@ ; CHECK-NEXT: vfmv.s.f v25, fa0 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v8, ft0 ; CHECK-NEXT: addi a1, a0, -32 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfredusum.vs v8, v16, v8, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfredusum.vs v25, v16, v25, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = call reassoc float @llvm.vp.reduce.fadd.v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 %evl) ret float %r @@ -164,17 +161,14 @@ ; CHECK-NEXT: vfmv.s.f v25, fa0 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v8, ft0 ; CHECK-NEXT: addi a1, a0, -32 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfredosum.vs v8, v16, v8, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfredosum.vs v25, v16, v25, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = call float @llvm.vp.reduce.fadd.v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 %evl) ret float %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -238,15 +238,13 @@ define half @vreduce_ord_fadd_v128f16(ptr %x, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_v128f16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vle16.v v16, (a1) ; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v24 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vfmv.s.f v8, ft0 ; CHECK-NEXT: vfredosum.vs v8, v16, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -670,15 +668,13 @@ define float @vreduce_ord_fadd_v64f32(ptr %x, float %s) { ; CHECK-LABEL: vreduce_ord_fadd_v64f32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vle32.v v16, (a1) ; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v24 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vfmv.s.f v8, ft0 ; CHECK-NEXT: vfredosum.vs v8, v16, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -715,20 +711,15 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v16, a0 +; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfwredosum.vs v16, v16, v24 -; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma -; CHECK-NEXT: vfmv.f.s ft0, v16 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, ft0 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfwredosum.vs v8, v8, v16 +; CHECK-NEXT: vfwredosum.vs v8, v8, v24 +; CHECK-NEXT: vfwredosum.vs v8, v16, v8 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1084,14 +1075,12 @@ define double @vreduce_ord_fadd_v32f64(ptr %x, double %s) { ; CHECK-LABEL: vreduce_ord_fadd_v32f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v16, (a0) +; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v24 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vfmv.s.f v8, ft0 ; CHECK-NEXT: vfredosum.vs v8, v16, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1126,18 +1115,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v16, 16 +; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vfwredosum.vs v16, v16, v24 -; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma -; CHECK-NEXT: vfmv.f.s ft0, v16 -; CHECK-NEXT: vfmv.s.f v16, ft0 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vfwredosum.vs v8, v8, v16 +; CHECK-NEXT: vfwredosum.vs v8, v8, v24 +; CHECK-NEXT: vfwredosum.vs v8, v16, v8 ; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll @@ -93,30 +93,28 @@ define half @vpreduce_fadd_nxv64f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a1, a2, 1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 1 ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v24, v0, a1 -; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: sub a1, a0, a2 -; CHECK-NEXT: sltu a3, a0, a1 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 +; CHECK-NEXT: vslidedown.vx v24, v0, a2 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: bltu a0, a2, .LBB6_2 +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: bltu a0, a1, .LBB6_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB6_2: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e16, m8, tu, ma ; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v8, ft0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: sub a1, a0, a1 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfredusum.vs v8, v16, v8, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfredusum.vs v25, v16, v25, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.nxv64f16(half %s, %v, %m, i32 %evl) ret half %r @@ -125,30 +123,28 @@ define half @vpreduce_ord_fadd_nxv64f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a1, a2, 1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 1 ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v24, v0, a1 -; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: sub a1, a0, a2 -; CHECK-NEXT: sltu a3, a0, a1 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 +; CHECK-NEXT: vslidedown.vx v24, v0, a2 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: bltu a0, a2, .LBB7_2 +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: bltu a0, a1, .LBB7_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e16, m8, tu, ma ; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v8, ft0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: sub a1, a0, a1 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfredosum.vs v8, v16, v8, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfredosum.vs v25, v16, v25, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = call half @llvm.vp.reduce.fadd.nxv64f16(half %s, %v, %m, i32 %evl) ret half %r