Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9749,6 +9749,14 @@ Value *PrevInChain = State.get(getChainOp(), 0); RecurKind Kind = RdxDesc->getRecurrenceKind(); bool IsOrdered = State.ILV->useOrderedReductions(*RdxDesc); + FastMathFlags Current_FMF = State.Builder.getFastMathFlags(); + if (IsOrdered) { + // Propagate the fast-math flags carried by the underlying instruction. + if (auto *FPMO = dyn_cast(getUnderlyingInstr())) { + FastMathFlags FMF = FPMO->getFastMathFlags(); + State.Builder.setFastMathFlags(FMF); + } + } for (unsigned Part = 0; Part < State.UF; ++Part) { Value *NewVecOp = State.get(getVecOp(), Part); if (VPValue *Cond = getCondOp()) { @@ -9788,6 +9796,8 @@ PrevInChain); State.set(this, NextInChain, Part); } + // Reset fast-math flags. + State.Builder.setFastMathFlags(Current_FMF); } void VPReplicateRecipe::execute(VPTransformState &State) { Index: llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll @@ -812,6 +812,52 @@ ret double %res } +; Test case where fadd has a fast-math flag. +define float @fadd_strict_fmf(float* noalias nocapture readonly %a, i64 %n) { +; CHECK-ORDERED-LABEL: @fadd_strict_fmf +; CHECK-ORDERED: vector.body: +; CHECK-ORDERED: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %vector.ph ], [ [[RDX:%.*]], %vector.body ] +; CHECK-ORDERED: [[LOAD_VEC:%.*]] = load <8 x float>, <8 x float>* +; CHECK-ORDERED: [[RDX]] = call nnan float @llvm.vector.reduce.fadd.v8f32(float [[VEC_PHI]], <8 x float> [[LOAD_VEC]]) +; CHECK-ORDERED: for.end: +; CHECK-ORDERED: [[RES:%.*]] = phi float [ [[SCALAR:%.*]], %for.body ], [ [[RDX]], %middle.block ] +; CHECK-ORDERED: ret float [[RES]] + +; CHECK-UNORDERED-LABEL: @fadd_strict_fmf +; CHECK-UNORDERED: vector.body: +; CHECK-UNORDERED: [[VEC_PHI:%.*]] = phi <8 x float> [ , %vector.ph ], [ [[FADD_VEC:%.*]], %vector.body ] +; CHECK-UNORDERED: [[LOAD_VEC:%.*]] = load <8 x float>, <8 x float>* +; CHECK-UNORDERED: [[FADD_VEC]] = fadd nnan <8 x float> [[LOAD_VEC]], [[VEC_PHI]] +; CHECK-UNORDERED-NOT: call nnan float @llvm.vector.reduce.fadd +; CHECK-UNORDERED: middle.block: +; CHECK-UNORDERED: [[RDX:%.*]] = call nnan float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[FADD_VEC]]) +; CHECK-UNORDERED: for.body: +; CHECK-UNORDERED: [[LOAD:%.*]] = load float, float* +; CHECK-UNORDERED: [[FADD:%.*]] = fadd nnan float [[LOAD]], {{.*}} +; CHECK-UNORDERED: for.end: +; CHECK-UNORDERED: [[RES:%.*]] = phi float [ [[FADD]], %for.body ], [ [[RDX]], %middle.block ] +; CHECK-UNORDERED: ret float [[RES]] + +; CHECK-NOT-VECTORIZED-LABEL: @fadd_strict_fmf +; CHECK-NOT-VECTORIZED-NOT: vector.body + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds float, float* %a, i64 %iv + %0 = load float, float* %arrayidx, align 4 + %add = fadd nnan float %0, %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret float %add +} + !0 = distinct !{!0, !5, !9, !11} !1 = distinct !{!1, !5, !10, !11} !2 = distinct !{!2, !6, !9, !11}