Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9636,6 +9636,7 @@ VecOp, Plan->getVPValue(R->getOperand(1))}; VPInstruction *FMulRecipe = new VPInstruction(Instruction::FMul, FMulOps); + FMulRecipe->setFastMathFlags(R->getFastMathFlags()); WidenRecipe->getParent()->insert(FMulRecipe, WidenRecipe->getIterator()); VecOp = FMulRecipe; Index: llvm/lib/Transforms/Vectorize/VPlan.h =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.h +++ llvm/lib/Transforms/Vectorize/VPlan.h @@ -789,6 +789,7 @@ private: typedef unsigned char OpcodeTy; OpcodeTy Opcode; + FastMathFlags FMF; /// Utility method serving execute(): generates a single instance of the /// modeled instruction. @@ -870,6 +871,9 @@ return true; } } + + // Set the fast-math flags. + void setFastMathFlags(FastMathFlags FMFNew) { FMF = FMFNew; } }; /// VPWidenRecipe is a recipe for producing a copy of vector type its Index: llvm/lib/Transforms/Vectorize/VPlan.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.cpp +++ llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -718,6 +718,8 @@ void VPInstruction::execute(VPTransformState &State) { assert(!State.Instance && "VPInstruction executing an Instance"); + IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); + State.Builder.setFastMathFlags(FMF); for (unsigned Part = 0; Part < State.UF; ++Part) generateInstruction(State, Part); } Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -483,10 +483,10 @@ ; CHECK-ORDERED: [[WIDE_LOAD5:%.*]] = load , * ; CHECK-ORDERED: [[WIDE_LOAD6:%.*]] = load , * ; CHECK-ORDERED: [[WIDE_LOAD7:%.*]] = load , * -; CHECK-ORDERED: [[FMUL:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD4]] -; CHECK-ORDERED: [[FMUL1:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD5]] -; CHECK-ORDERED: [[FMUL2:%.*]] = fmul [[WIDE_LOAD2]], [[WIDE_LOAD6]] -; CHECK-ORDERED: [[FMUL3:%.*]] = fmul [[WIDE_LOAD3]], [[WIDE_LOAD7]] +; CHECK-ORDERED: [[FMUL:%.*]] = fmul nnan [[WIDE_LOAD]], [[WIDE_LOAD4]] +; CHECK-ORDERED: [[FMUL1:%.*]] = fmul nnan [[WIDE_LOAD1]], [[WIDE_LOAD5]] +; CHECK-ORDERED: [[FMUL2:%.*]] = fmul nnan [[WIDE_LOAD2]], [[WIDE_LOAD6]] +; CHECK-ORDERED: [[FMUL3:%.*]] = fmul nnan [[WIDE_LOAD3]], [[WIDE_LOAD7]] ; CHECK-ORDERED: [[RDX:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[FMUL]]) ; CHECK-ORDERED: [[RDX1:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[RDX]], [[FMUL1]]) ; CHECK-ORDERED: [[RDX2:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[RDX1]], [[FMUL2]])