Index: llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h =================================================================== --- llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -344,9 +344,6 @@ unsigned getNumStores() const { return LAI->getNumStores(); } unsigned getNumLoads() const { return LAI->getNumLoads(); } - // Returns true if the NoNaN attribute is set on the function. - bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; } - /// Returns all assume calls in predicated blocks. They need to be dropped /// when flattening the CFG. const SmallPtrSetImpl &getConditionalAssumes() const { @@ -495,9 +492,6 @@ /// outside the loop. SmallPtrSet AllowedExit; - /// Can we assume the absence of NaNs. - bool HasFunNoNaNAttr = false; - /// Vectorization requirements that will go through late-evaluation. LoopVectorizationRequirements *Requirements; Index: llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -605,11 +605,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { BasicBlock *Header = TheLoop->getHeader(); - // Look for the attribute signaling the absence of NaNs. - Function &F = *Header->getParent(); - HasFunNoNaNAttr = - F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; - // For each block in the loop. for (BasicBlock *BB : TheLoop->blocks()) { // Scan the instructions in the block and look for hazards. @@ -673,7 +668,7 @@ InductionDescriptor ID; if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) { addInductionPhi(Phi, ID, AllowedExit); - if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr) + if (ID.hasUnsafeAlgebra()) Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst()); continue; } Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8907,7 +8907,7 @@ ? RecipeBuilder.createBlockInMask(R->getParent(), Plan) : nullptr; VPReductionRecipe *RedRecipe = new VPReductionRecipe( - &RdxDesc, R, ChainOp, VecOp, CondOp, Legal->hasFunNoNaNAttr(), TTI); + &RdxDesc, R, ChainOp, VecOp, CondOp, TTI); WidenRecipe->getVPValue()->replaceAllUsesWith(RedRecipe); Plan->removeVPValueFor(R); Plan->addVPValue(R, RedRecipe); Index: llvm/lib/Transforms/Vectorize/VPlan.h =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.h +++ llvm/lib/Transforms/Vectorize/VPlan.h @@ -1130,17 +1130,15 @@ class VPReductionRecipe : public VPRecipeBase, public VPUser, public VPValue { /// The recurrence decriptor for the reduction in question. RecurrenceDescriptor *RdxDesc; - /// Fast math flags to use for the resulting reduction operation. - bool NoNaN; /// Pointer to the TTI, needed to create the target reduction const TargetTransformInfo *TTI; public: VPReductionRecipe(RecurrenceDescriptor *R, Instruction *I, VPValue *ChainOp, - VPValue *VecOp, VPValue *CondOp, bool NoNaN, + VPValue *VecOp, VPValue *CondOp, const TargetTransformInfo *TTI) : VPRecipeBase(VPRecipeBase::VPReductionSC), VPUser({ChainOp, VecOp}), - VPValue(VPValue::VPVReductionSC, I, this), RdxDesc(R), NoNaN(NoNaN), + VPValue(VPValue::VPVReductionSC, I, this), RdxDesc(R), TTI(TTI) { if (CondOp) addOperand(CondOp); Index: llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -4,7 +4,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" ; This test checks auto-vectorization with FP induction variable. -; The FP operation is not "fast" and requires "fast-math" function attribute. +; FMF is required on the IR instructions. ;void fp_iv_loop1(float * __restrict__ A, int N) { ; float x = 1.0; @@ -149,7 +149,7 @@ ; AUTO_VEC-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] ; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] ; AUTO_VEC-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 -; AUTO_VEC-NEXT: [[CONV1]] = fadd float [[X_06]], 5.000000e-01 +; AUTO_VEC-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 ; AUTO_VEC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; AUTO_VEC-NEXT: [[TMP45:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]] ; AUTO_VEC-NEXT: br i1 [[TMP45]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP4:!llvm.loop !.*]] @@ -168,7 +168,7 @@ %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ] %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv store float %x.06, float* %arrayidx, align 4 - %conv1 = fadd float %x.06, 5.000000e-01 + %conv1 = fadd fast float %x.06, 5.000000e-01 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %N @@ -181,7 +181,7 @@ ret void } -; The same as the previous, FP operation is not fast, different function attribute +; The same as the previous, but FP operation has no FMF. ; Vectorization should be rejected. ;void fp_iv_loop2(float * __restrict__ A, int N) { ; float x = 1.0; @@ -191,7 +191,7 @@ ; } ;} -define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) #1 { +define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) { ; AUTO_VEC-LABEL: @fp_iv_loop2( ; AUTO_VEC-NEXT: entry: ; AUTO_VEC-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 @@ -538,6 +538,3 @@ %t1 = phi double [ %j, %for.body ] ret double %t1 } - -attributes #0 = { "no-nans-fp-math"="true" } -attributes #1 = { "no-nans-fp-math"="false" } Index: llvm/unittests/Transforms/Vectorize/VPlanTest.cpp =================================================================== --- llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -643,7 +643,7 @@ VPValue ChainOp; VPValue VecOp; VPValue CondOp; - VPReductionRecipe Recipe(nullptr, nullptr, &ChainOp, &CondOp, &VecOp, false, + VPReductionRecipe Recipe(nullptr, nullptr, &ChainOp, &CondOp, &VecOp, nullptr); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe;