diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -6122,8 +6122,20 @@ assert(NeutralElem && "Neutral element must exist"); // Pad the vector with the neutral element. - unsigned OrigElts = OrigVT.getVectorNumElements(); - unsigned WideElts = WideVT.getVectorNumElements(); + unsigned OrigElts = OrigVT.getVectorMinNumElements(); + unsigned WideElts = WideVT.getVectorMinNumElements(); + + if (WideVT.isScalableVector()) { + unsigned GCD = greatestCommonDivisor(OrigElts, WideElts); + EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, + ElementCount::getScalable(GCD)); + SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem); + for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD) + Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Op, SplatNeutral, + DAG.getVectorIdxConstant(Idx, dl)); + return DAG.getNode(Opc, dl, N->getValueType(0), Op, Flags); + } + for (unsigned Idx = OrigElts; Idx < WideElts; Idx++) Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem, DAG.getVectorIdxConstant(Idx, dl)); diff --git a/llvm/test/CodeGen/AArch64/sve-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-int-reduce.ll --- a/llvm/test/CodeGen/AArch64/sve-int-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-reduce.ll @@ -372,6 +372,85 @@ ret i64 %res } +; Test widen vector reduce type +declare i8 @llvm.vector.reduce.smin.nxv10i8() + +define i8 @smin_nxv10i8( %a) { +; CHECK-LABEL: smin_nxv10i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.h, z0.b +; CHECK-NEXT: mov z1.d, #127 // =0x7f +; CHECK-NEXT: uunpklo z3.s, z2.h +; CHECK-NEXT: uunpkhi z2.s, z2.h +; CHECK-NEXT: uunpklo z3.d, z3.s +; CHECK-NEXT: uunpklo z0.h, z0.b +; CHECK-NEXT: uzp1 z3.s, z3.s, z1.s +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: uzp1 z2.h, z3.h, z2.h +; CHECK-NEXT: uzp1 z2.b, z0.b, z2.b +; CHECK-NEXT: uunpkhi z2.h, z2.b +; CHECK-NEXT: uunpkhi z3.s, z2.h +; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: uunpkhi z3.d, z3.s +; CHECK-NEXT: uzp1 z3.s, z1.s, z3.s +; CHECK-NEXT: uzp1 z2.h, z2.h, z3.h +; CHECK-NEXT: uzp1 z2.b, z0.b, z2.b +; CHECK-NEXT: uunpkhi z2.h, z2.b +; CHECK-NEXT: uunpkhi z3.s, z2.h +; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: uunpklo z3.d, z3.s +; CHECK-NEXT: uzp1 z1.s, z3.s, z1.s +; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: sminv b0, p0, z0.b +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret + %res = call i8 @llvm.vector.reduce.smin.nxv10i8( %a) + ret i8 %res +} + +declare i8 @llvm.vector.reduce.add.nxv12i8() + +define i8 @uaddv_nxv12i8( %a) { +; CHECK-LABEL: uaddv_nxv12i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.h, z0.b +; CHECK-NEXT: mov z1.s, #0 // =0x0 +; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: uunpklo z0.h, z0.b +; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: uaddv d0, p0, z0.b +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %res = call i8 @llvm.vector.reduce.add.nxv12i8( %a) + ret i8 %res +} + +declare i8 @llvm.vector.reduce.umax.nxv14i8() + +define i8 @umax_nxv14i8( %a) { +; CHECK-LABEL: umax_nxv14i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.h, z0.b +; CHECK-NEXT: mov z1.d, #0 // =0x0 +; CHECK-NEXT: uunpkhi z3.s, z2.h +; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: uunpklo z3.d, z3.s +; CHECK-NEXT: uunpklo z0.h, z0.b +; CHECK-NEXT: uzp1 z1.s, z3.s, z1.s +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: umaxv b0, p0, z0.b +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret + %res = call i8 @llvm.vector.reduce.umax.nxv14i8( %a) + ret i8 %res +} + declare i8 @llvm.vector.reduce.and.nxv16i8() declare i16 @llvm.vector.reduce.and.nxv8i16() declare i32 @llvm.vector.reduce.and.nxv4i32() diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -1146,3 +1146,97 @@ %red = call half @llvm.vector.reduce.fadd.nxv12f16(half %s, %v) ret half %red } + +; Test Widen vector reduce type (fadd/fmin/fmax) +define half @vreduce_fadd_nxv3f16( %v, half %s) { +; CHECK-LABEL: vreduce_fadd_nxv3f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: fneg.h ft0, ft0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vfmv.v.f v9, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a1 +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu +; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vfredusum.vs v8, v8, v9 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret + %red = call reassoc half @llvm.vector.reduce.fadd.nxv3f16(half %s, %v) + ret half %red +} + +define half @vreduce_fadd_nxv6f16( %v, half %s) { +; CHECK-LABEL: vreduce_fadd_nxv6f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: add a1, a0, a0 +; CHECK-NEXT: fmv.h.x ft0, zero +; CHECK-NEXT: fneg.h ft0, ft0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vfmv.v.f v10, ft0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu +; CHECK-NEXT: vslideup.vx v9, v10, a0 +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu +; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vfredusum.vs v8, v8, v10 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret + %red = call reassoc half @llvm.vector.reduce.fadd.nxv6f16(half %s, %v) + ret half %red +} + +declare half @llvm.vector.reduce.fmin.nxv10f16() + +define half @vreduce_fmin_nxv10f16( %v) { +; CHECK-LABEL: vreduce_fmin_nxv10f16: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: lui a1, %hi(.LCPI73_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI73_0)(a1) +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: add a1, a0, a0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vfmv.v.f v12, ft0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu +; CHECK-NEXT: vslideup.vx v10, v12, a0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vslideup.vi v11, v12, 0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu +; CHECK-NEXT: vslideup.vx v11, v12, a0 +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu +; CHECK-NEXT: vfmv.s.f v12, ft0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vfredmin.vs v8, v8, v12 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret + %red = call half @llvm.vector.reduce.fmin.nxv10f16( %v) + ret half %red +} + +declare half @llvm.vector.reduce.fmax.nxv12f16() + +define half @vreduce_fmax_nxv12f16( %v) { +; CHECK-LABEL: vreduce_fmax_nxv12f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI74_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI74_0)(a0) +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu +; CHECK-NEXT: vfmv.s.f v12, ft0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vfmv.v.f v11, ft0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vfredmax.vs v8, v8, v12 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret + %red = call half @llvm.vector.reduce.fmax.nxv12f16( %v) + ret half %red +}