diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -136,7 +136,8 @@ static InstDesc isConditionalRdxPattern(RecurKind Kind, Instruction *I); /// Returns identity corresponding to the RecurrenceKind. - static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp); + static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp, + FastMathFlags FMF); /// Returns the opcode corresponding to the RecurrenceKind. static unsigned getOpcode(RecurKind Kind); diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -761,7 +761,8 @@ /// This function returns the identity element (or neutral element) for /// the operation K. -Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp) { +Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp, + FastMathFlags FMF) { switch (K) { case RecurKind::Xor: case RecurKind::Add: @@ -779,7 +780,14 @@ return ConstantFP::get(Tp, 1.0L); case RecurKind::FAdd: // Adding zero to a number does not change it. - return ConstantFP::get(Tp, 0.0L); + // FIXME: Ideally we should not need to check FMF for FAdd and should always + // use -0.0. However, this will currently result in mixed vectors of 0.0/-0.0. + // Instead, we should ensure that 1) the FMF from FAdd are propagated to the PHI + // nodes where possible, and 2) PHIs with the nsz flag + -0.0 use 0.0. This would + // mean we can then remove the check for noSignedZeros() below (see D98963). + if (FMF.noSignedZeros()) + return ConstantFP::get(Tp, 0.0L); + return ConstantFP::get(Tp, -0.0L); case RecurKind::UMin: return ConstantInt::get(Tp, -1); case RecurKind::UMax: diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4697,7 +4697,7 @@ } } else { Constant *IdenC = RecurrenceDescriptor::getRecurrenceIdentity( - RK, VecTy->getScalarType()); + RK, VecTy->getScalarType(), RdxDesc->getFastMathFlags()); Iden = IdenC; if (!ScalarPHI) { @@ -9207,7 +9207,7 @@ Value *NewCond = State.get(Cond, Part); VectorType *VecTy = cast(NewVecOp->getType()); Constant *Iden = RecurrenceDescriptor::getRecurrenceIdentity( - Kind, VecTy->getElementType()); + Kind, VecTy->getElementType(), RdxDesc->getFastMathFlags()); Constant *IdenVec = ConstantVector::getSplat(VecTy->getElementCount(), Iden); Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, IdenVec); diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll --- a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll @@ -129,8 +129,8 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, float* [[ARRAY:%.*]], i32 [[TMP0]] @@ -153,7 +153,7 @@ ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[LOOP_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ -0.000000e+00, [[LOOP_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -168,7 +168,7 @@ ; CHECK-NEXT: [[SUM_INC_LCSSA:%.*]] = phi float [ [[SUM_INC]], [[LOOP]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP_EXIT]] ; CHECK: loop.exit: -; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[SUM_INC_LCSSA]], [[LOOP_EXIT_LOOPEXIT]] ] +; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ -0.000000e+00, [[ENTRY:%.*]] ], [ [[SUM_INC_LCSSA]], [[LOOP_EXIT_LOOPEXIT]] ] ; CHECK-NEXT: ret float [[SUM_LCSSA]] ; entry: @@ -177,7 +177,7 @@ loop: %idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ] - %sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ] + %sum = phi float [ -0.000000e+00, %entry ], [ %sum.inc, %loop ] %address = getelementptr float, float* %array, i32 %idx %value = load float, float* %address %sum.inc = fadd reassoc float %sum, %value @@ -186,7 +186,7 @@ br i1 %be.cond, label %loop, label %loop.exit loop.exit: - %sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ] + %sum.lcssa = phi float [ %sum.inc, %loop ], [ -0.000000e+00, %entry ] ret float %sum.lcssa } @@ -201,8 +201,8 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, float* [[ARRAY:%.*]], i32 [[TMP0]] @@ -225,7 +225,7 @@ ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[LOOP_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ -0.000000e+00, [[LOOP_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -240,7 +240,7 @@ ; CHECK-NEXT: [[SUM_INC_LCSSA:%.*]] = phi float [ [[SUM_INC]], [[LOOP]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP_EXIT]] ; CHECK: loop.exit: -; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[SUM_INC_LCSSA]], [[LOOP_EXIT_LOOPEXIT]] ] +; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ -0.000000e+00, [[ENTRY:%.*]] ], [ [[SUM_INC_LCSSA]], [[LOOP_EXIT_LOOPEXIT]] ] ; CHECK-NEXT: ret float [[SUM_LCSSA]] ; entry: @@ -249,7 +249,7 @@ loop: %idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ] - %sum = phi float [ 0.000000e+00, %entry ], [ %sum.inc, %loop ] + %sum = phi float [ -0.000000e+00, %entry ], [ %sum.inc, %loop ] %address = getelementptr float, float* %array, i32 %idx %value = load float, float* %address %sum.inc = fadd reassoc contract float %sum, %value @@ -258,7 +258,7 @@ br i1 %be.cond, label %loop, label %loop.exit loop.exit: - %sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.000000e+00, %entry ] + %sum.lcssa = phi float [ %sum.inc, %loop ], [ -0.000000e+00, %entry ] ret float %sum.lcssa }