diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -706,6 +706,10 @@ return InstDesc(Kind == RecurKind::FMin, I); if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value()))) return InstDesc(Kind == RecurKind::FMax, I); + if (match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value()))) + return InstDesc(Kind == RecurKind::FMin, I); + if (match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value()))) + return InstDesc(Kind == RecurKind::FMax, I); return InstDesc(false, I); } @@ -801,11 +805,20 @@ case Instruction::Call: if (isSelectCmpRecurrenceKind(Kind)) return isSelectCmpPattern(L, OrigPhi, I, Prev); + auto HasRequiredFMF = [&]() { + if (FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) + return true; + if (isa<FPMathOperator>(I) && I->hasNoNaNs() && I->hasNoSignedZeros()) + return true; + // minimum and maximum intrinsics do not require nsz and nnan flags since + // signed zeroes and NaN are these are supported in the intrinsic + // implementation. + return match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())) || + match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value())); + }; + if (isIntMinMaxRecurrenceKind(Kind) || - (((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) || - (isa<FPMathOperator>(I) && I->hasNoNaNs() && - I->hasNoSignedZeros())) && - isFPMinMaxRecurrenceKind(Kind))) + (HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind))) return isMinMaxPattern(I, Kind, Prev); else if (isFMulAddIntrinsic(I)) return InstDesc(Kind == RecurKind::FMulAdd, I, diff --git a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll --- a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll @@ -1090,6 +1090,194 @@ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } +; CHECK-LABEL: fminnum_fminnum +; CHECK: call nnan nsz <2 x float> @llvm.minnum.v2f32 +; CHECK: call nnan nsz <2 x float> @llvm.minnum.v2f32 +; CHECK: call nnan nsz float @llvm.vector.reduce.fmin.v2f32 +define float @fminnum_fminnum(ptr nocapture readonly %x, ptr nocapture readonly %y) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret float %cond9 + +for.body: ; preds = %entry, %for.body + %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi float [ 0.000000e+00, %entry ], [ %cond9, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.025 + %0 = load float, ptr %arrayidx, align 4 + %s.0. = tail call nnan nsz float @llvm.minnum.f32(float %s.011, float %0) + %arrayidx3 = getelementptr inbounds float, ptr %y, i32 %i.025 + %1 = load float, ptr %arrayidx3, align 4 + %cond9 = tail call nnan nsz float @llvm.minnum.f32(float %s.0., float %1) + %inc = add nuw nsw i32 %i.025, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: fmaximum_intrinsic +; CHECK: call <2 x float> @llvm.maximum.v2f32 +; CHECK: call float @llvm.vector.reduce.fmax.v2f32 +define float @fmaximum_intrinsic(ptr nocapture readonly %x) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret float %1 + +for.body: ; preds = %entry, %for.body + %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012 + %0 = load float, ptr %arrayidx, align 4 + %1 = tail call float @llvm.maximum.f32(float %s.011, float %0) + %inc = add nuw nsw i32 %i.012, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: fminimum_intrinsic +; CHECK: call <2 x float> @llvm.minimum.v2f32 +; CHECK: call float @llvm.vector.reduce.fmin.v2f32 +define float @fminimum_intrinsic(ptr nocapture readonly %x) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret float %1 + +for.body: ; preds = %entry, %for.body + %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012 + %0 = load float, ptr %arrayidx, align 4 + %1 = tail call float @llvm.minimum.f32(float %s.011, float %0) + %inc = add nuw nsw i32 %i.012, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: fminimum_fminimum +; CHECK: call <2 x float> @llvm.minimum.v2f32 +; CHECK: call <2 x float> @llvm.minimum.v2f32 +define float @fminimum_fminimum(ptr nocapture readonly %x, ptr nocapture readonly %y) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret float %cond9 + +for.body: ; preds = %entry, %for.body + %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi float [ 0.000000e+00, %entry ], [ %cond9, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.025 + %0 = load float, ptr %arrayidx, align 4 + %s.0. = tail call float @llvm.minimum.f32(float %s.011, float %0) + %arrayidx3 = getelementptr inbounds float, ptr %y, i32 %i.025 + %1 = load float, ptr %arrayidx3, align 4 + %cond9 = tail call float @llvm.minimum.f32(float %s.0., float %1) + %inc = add nuw nsw i32 %i.025, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: fminimum_fminimum_one_with_flags +; CHECK: call nnan nsz <2 x float> @llvm.minimum.v2f32 +; CHECK: call <2 x float> @llvm.minimum.v2f32 +; CHECK: call float @llvm.vector.reduce.fmin.v2f32 +define float @fminimum_fminimum_one_with_flags(ptr nocapture readonly %x, ptr nocapture readonly %y) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret float %cond9 + +for.body: ; preds = %entry, %for.body + %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi float [ 0.000000e+00, %entry ], [ %cond9, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.025 + %0 = load float, ptr %arrayidx, align 4 + %s.0. = tail call nnan nsz float @llvm.minimum.f32(float %s.011, float %0) + %arrayidx3 = getelementptr inbounds float, ptr %y, i32 %i.025 + %1 = load float, ptr %arrayidx3, align 4 + %cond9 = tail call float @llvm.minimum.f32(float %s.0., float %1) + %inc = add nuw nsw i32 %i.025, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +; don't vectorize. minnum missing the nsz nnan flags. +; CHECK-LABEL: fminimum_fminnum +; CHECK-NOT: <2 x float> +define float @fminimum_fminnum(ptr nocapture readonly %x, ptr nocapture readonly %y) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret float %cond9 + +for.body: ; preds = %entry, %for.body + %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi float [ 0.000000e+00, %entry ], [ %cond9, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.025 + %0 = load float, ptr %arrayidx, align 4 + %s.0. = tail call float @llvm.minimum.f32(float %s.011, float %0) + %arrayidx3 = getelementptr inbounds float, ptr %y, i32 %i.025 + %1 = load float, ptr %arrayidx3, align 4 + %cond9 = tail call float @llvm.minnum.f32(float %s.0., float %1) + %inc = add nuw nsw i32 %i.025, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +; missing flags in chain. do not vectorize. +; CHECK-LABEL: fminnum_fminnum_noflags +; CHECK-NOT: <2 x float> +define float @fminnum_fminnum_noflags(ptr nocapture readonly %x, ptr nocapture readonly %y) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret float %cond9 + +for.body: ; preds = %entry, %for.body + %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi float [ 0.000000e+00, %entry ], [ %cond9, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.025 + %0 = load float, ptr %arrayidx, align 4 + %s.0. = tail call nnan nsz float @llvm.minnum.f32(float %s.011, float %0) + %arrayidx3 = getelementptr inbounds float, ptr %y, i32 %i.025 + %1 = load float, ptr %arrayidx3, align 4 + %cond9 = tail call nsz float @llvm.minnum.f32(float %s.0., float %1) + %inc = add nuw nsw i32 %i.025, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +; minimum maximum +; CHECK-LABEL: fminimum_fmaximum +; CHECK-NOT: <2 x float> +define float @fminimum_fmaximum(ptr nocapture readonly %x, ptr nocapture readonly %y) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret float %cond9 + +for.body: ; preds = %entry, %for.body + %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi float [ 0.000000e+00, %entry ], [ %cond9, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.025 + %0 = load float, ptr %arrayidx, align 4 + %s.0. = tail call float @llvm.minimum.f32(float %s.011, float %0) + %arrayidx3 = getelementptr inbounds float, ptr %y, i32 %i.025 + %1 = load float, ptr %arrayidx3, align 4 + %cond9 = tail call float @llvm.maximum.f32(float %s.0., float %1) + %inc = add nuw nsw i32 %i.025, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + ; Make sure any check-not directives are not triggered by function declarations. ; CHECK: declare @@ -1099,6 +1287,8 @@ declare i32 @llvm.umax.i32(i32, i32) declare float @llvm.minnum.f32(float, float) declare float @llvm.maxnum.f32(float, float) +declare float @llvm.minimum.f32(float, float) +declare float @llvm.maximum.f32(float, float) attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" } attributes #1 = { "no-nans-fp-math"="true" }