Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8626,15 +8626,19 @@ switch (Intrinsic) { case Intrinsic::experimental_vector_reduce_fadd: - if (FMF.isFast()) + if (FMF.isFast()) { Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2); - else + if (!Op1.isUndef()) + Res = DAG.getNode(ISD::FADD, dl, VT, Op1, Res); + } else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); break; case Intrinsic::experimental_vector_reduce_fmul: - if (FMF.isFast()) + if (FMF.isFast()) { Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2); - else + if (!Op1.isUndef()) + Res = DAG.getNode(ISD::FMUL, dl, VT, Op1, Res); + } else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); break; case Intrinsic::experimental_vector_reduce_add: Index: test/CodeGen/AArch64/vecreduce-fadd.ll =================================================================== --- test/CodeGen/AArch64/vecreduce-fadd.ll +++ test/CodeGen/AArch64/vecreduce-fadd.ll @@ -92,6 +92,33 @@ ret double %r } +define half @add_H_init42(<8 x half> %bin.rdx) { +; CHECK-LABEL: add_H_init42: +; CHECK: faddp h0, v0.2h +; CHECK: fadd h0 +; CHECK-NEXT: ret + %r = call fast half @llvm.experimental.vector.reduce.fadd.f16.v8f16(half 42.0, <8 x half> %bin.rdx) + ret half %r +} + +define float @add_S_init42(<4 x float> %bin.rdx) { +; CHECK-LABEL: add_S_init42: +; CHECK: faddp s0, v0.2s +; CHECK: fadd s0 +; CHECK-NEXT: ret + %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float 42.0, <4 x float> %bin.rdx) + ret float %r +} + +define double @add_D_init42(<2 x double> %bin.rdx) { +; CHECK-LABEL: add_D_init42: +; CHECK: faddp d0, v0.2d +; CHECK: fadd d0 +; CHECK-NEXT: ret + %r = call fast double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double 42.0, <2 x double> %bin.rdx) + ret double %r +} + ; Function Attrs: nounwind readnone declare half @llvm.experimental.vector.reduce.fadd.f16.v4f16(half, <4 x half>) declare half @llvm.experimental.vector.reduce.fadd.f16.v8f16(half, <8 x half>)