diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -47,6 +47,8 @@ FMul, ///< Product of floats. FMin, ///< FP min implemented in terms of select(cmp()). FMax, ///< FP max implemented in terms of select(cmp()). + FMinimum, ///< FP min with llvm.minimum semantics + FMaximum, ///< FP max with llvm.maximum semantics FMulAdd, ///< Fused multiply-add of floats (a * b + c). SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop ///< invariant @@ -223,7 +225,8 @@ /// Returns true if the recurrence kind is a floating-point min/max kind. static bool isFPMinMaxRecurrenceKind(RecurKind Kind) { - return Kind == RecurKind::FMin || Kind == RecurKind::FMax; + return Kind == RecurKind::FMin || Kind == RecurKind::FMax || + Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum; } /// Returns true if the recurrence kind is any min/max kind. diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -706,6 +706,10 @@ return InstDesc(Kind == RecurKind::FMin, I); if (match(I, m_Intrinsic(m_Value(), m_Value()))) return InstDesc(Kind == RecurKind::FMax, I); + if (match(I, m_Intrinsic(m_Value(), m_Value()))) + return InstDesc(Kind == RecurKind::FMinimum, I); + if (match(I, m_Intrinsic(m_Value(), m_Value()))) + return InstDesc(Kind == RecurKind::FMaximum, I); return InstDesc(false, I); } @@ -801,11 +805,18 @@ case Instruction::Call: if (isSelectCmpRecurrenceKind(Kind)) return isSelectCmpPattern(L, OrigPhi, I, Prev); + auto HasRequiredFMF = [&]() { + if (FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) + return true; + if (isa(I) && I->hasNoNaNs() && I->hasNoSignedZeros()) + return true; + // minimum and maximum intrinsics do not require nsz and nnan flags since + // NaN and signed zeroes are propagated in the intrinsic implementation. + return match(I, m_Intrinsic(m_Value(), m_Value())) || + match(I, m_Intrinsic(m_Value(), m_Value())); + }; if (isIntMinMaxRecurrenceKind(Kind) || - (((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) || - (isa(I) && I->hasNoNaNs() && - I->hasNoSignedZeros())) && - isFPMinMaxRecurrenceKind(Kind))) + (HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind))) return isMinMaxPattern(I, Kind, Prev); else if (isFMulAddIntrinsic(I)) return InstDesc(Kind == RecurKind::FMulAdd, I, @@ -923,6 +934,16 @@ LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n"); return true; } + if (AddReductionVar(Phi, RecurKind::FMaximum, TheLoop, FMF, RedDes, DB, AC, DT, + SE)) { + LLVM_DEBUG(dbgs() << "Found a float MAXIMUM reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RecurKind::FMinimum, TheLoop, FMF, RedDes, DB, AC, DT, + SE)) { + LLVM_DEBUG(dbgs() << "Found a float MINIMUM reduction PHI." << *Phi << "\n"); + return true; + } // Not a reduction of known type. return false; } @@ -1063,6 +1084,10 @@ assert((FMF.noNaNs() && FMF.noSignedZeros()) && "nnan, nsz is expected to be set for FP max reduction."); return ConstantFP::getInfinity(Tp, true /*Negative*/); + case RecurKind::FMinimum: + return ConstantFP::getInfinity(Tp, false /*Negative*/); + case RecurKind::FMaximum: + return ConstantFP::getInfinity(Tp, true /*Negative*/); case RecurKind::SelectICmp: case RecurKind::SelectFCmp: return getRecurrenceStartValue(); @@ -1097,6 +1122,8 @@ return Instruction::ICmp; case RecurKind::FMax: case RecurKind::FMin: + case RecurKind::FMaximum: + case RecurKind::FMinimum: case RecurKind::SelectFCmp: return Instruction::FCmp; default: diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -909,6 +909,10 @@ return Intrinsic::minnum; case RecurKind::FMax: return Intrinsic::maxnum; + case RecurKind::FMinimum: + return Intrinsic::minimum; + case RecurKind::FMaximum: + return Intrinsic::maximum; } } @@ -928,6 +932,9 @@ return CmpInst::FCMP_OLT; case RecurKind::FMax: return CmpInst::FCMP_OGT; + // We do not add FMinimum/FMaximum recurrence kind here since there is no + // equivalent predicate which compares signed zeroes according to the + // semantics of the intrinsics (llvm.minimum/maximum). } } @@ -943,7 +950,8 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right) { Type *Ty = Left->getType(); - if (Ty->isIntOrIntVectorTy()) { + if (Ty->isIntOrIntVectorTy() || + (RK == RecurKind::FMinimum || RK == RecurKind::FMaximum)) { // TODO: Add float minnum/maxnum support when FMF nnan is set. Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK); return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr, @@ -1094,6 +1102,10 @@ return Builder.CreateFPMaxReduce(Src); case RecurKind::FMin: return Builder.CreateFPMinReduce(Src); + case RecurKind::FMinimum: + return Builder.CreateFPMinimumReduce(Src); + case RecurKind::FMaximum: + return Builder.CreateFPMaximumReduce(Src); default: llvm_unreachable("Unhandled opcode"); } diff --git a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll --- a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -passes=loop-vectorize,dce -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s +; RUN: opt -S -passes=loop-vectorize,dce -force-vector-width=2 -force-vector-interleave=2 < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -1090,6 +1090,120 @@ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } +; CHECK-LABEL: fmaximum_intrinsic +; CHECK-LABEL: vector.body: +; CHECK: call <2 x float> @llvm.maximum.v2f32 +; CHECK: call <2 x float> @llvm.maximum.v2f32 + +; CHECK-LABEL: middle.block: +; CHECK: call <2 x float> @llvm.maximum.v2f32 +; CHECK: call float @llvm.vector.reduce.fmaximum.v2f32 +define float @fmaximum_intrinsic(ptr nocapture readonly %x) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret float %1 + +for.body: ; preds = %entry, %for.body + %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012 + %0 = load float, ptr %arrayidx, align 4 + %1 = tail call float @llvm.maximum.f32(float %s.011, float %0) + %inc = add nuw nsw i32 %i.012, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: fminimum_intrinsic +; CHECK-LABEL: vector.body: +; CHECK: call <2 x float> @llvm.minimum.v2f32 +; CHECK: call <2 x float> @llvm.minimum.v2f32 + +; CHECK-LABEL: middle.block: +; CHECK: call <2 x float> @llvm.minimum.v2f32 +; CHECK: call float @llvm.vector.reduce.fminimum.v2f32 +define float @fminimum_intrinsic(ptr nocapture readonly %x) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret float %1 + +for.body: ; preds = %entry, %for.body + %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012 + %0 = load float, ptr %arrayidx, align 4 + %1 = tail call float @llvm.minimum.f32(float %s.011, float %0) + %inc = add nuw nsw i32 %i.012, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: fminimum_fminimum +; CHECK-LABEL: vector.body: +; CHECK: call <2 x float> @llvm.minimum.v2f32 +; CHECK: call <2 x float> @llvm.minimum.v2f32 +; CHECK: call <2 x float> @llvm.minimum.v2f32 +; CHECK: call <2 x float> @llvm.minimum.v2f32 + +; CHECK-LABEL: middle.block: +; CHECK: call <2 x float> @llvm.minimum.v2f32 +; CHECK: call float @llvm.vector.reduce.fminimum.v2f32 +define float @fminimum_fminimum(ptr nocapture readonly %x, ptr nocapture readonly %y) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret float %cond9 + +for.body: ; preds = %entry, %for.body + %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi float [ 0.000000e+00, %entry ], [ %cond9, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.025 + %0 = load float, ptr %arrayidx, align 4 + %s.0. = tail call float @llvm.minimum.f32(float %s.011, float %0) + %arrayidx3 = getelementptr inbounds float, ptr %y, i32 %i.025 + %1 = load float, ptr %arrayidx3, align 4 + %cond9 = tail call float @llvm.minimum.f32(float %s.0., float %1) + %inc = add nuw nsw i32 %i.025, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: fminimum_fminimum_one_with_flags +; CHECK-LABEL: vector.body: +; CHECK: call nnan nsz <2 x float> @llvm.minimum.v2f32 +; CHECK: call nnan nsz <2 x float> @llvm.minimum.v2f32 +; CHECK: call <2 x float> @llvm.minimum.v2f32 +; CHECK: call <2 x float> @llvm.minimum.v2f32 + +; CHECK-LABEL: middle.block: +; CHECK: call <2 x float> @llvm.minimum.v2f32 +; CHECK: call float @llvm.vector.reduce.fminimum.v2f32 +define float @fminimum_fminimum_one_with_flags(ptr nocapture readonly %x, ptr nocapture readonly %y) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret float %cond9 + +for.body: ; preds = %entry, %for.body + %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi float [ 0.000000e+00, %entry ], [ %cond9, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.025 + %0 = load float, ptr %arrayidx, align 4 + %s.0. = tail call nnan nsz float @llvm.minimum.f32(float %s.011, float %0) + %arrayidx3 = getelementptr inbounds float, ptr %y, i32 %i.025 + %1 = load float, ptr %arrayidx3, align 4 + %cond9 = tail call float @llvm.minimum.f32(float %s.0., float %1) + %inc = add nuw nsw i32 %i.025, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + ; Make sure any check-not directives are not triggered by function declarations. ; CHECK: declare @@ -1099,6 +1213,8 @@ declare i32 @llvm.umax.i32(i32, i32) declare float @llvm.minnum.f32(float, float) declare float @llvm.maxnum.f32(float, float) +declare float @llvm.minimum.f32(float, float) +declare float @llvm.maximum.f32(float, float) attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" } attributes #1 = { "no-nans-fp-math"="true" }