Index: llvm/include/llvm/Analysis/IVDescriptors.h =================================================================== --- llvm/include/llvm/Analysis/IVDescriptors.h +++ llvm/include/llvm/Analysis/IVDescriptors.h @@ -117,7 +117,7 @@ /// compare instruction to the select instruction and stores this pointer in /// 'PatternLastInst' member of the returned struct. static InstDesc isRecurrenceInstr(Instruction *I, RecurKind Kind, - InstDesc &Prev, FastMathFlags FMF); + InstDesc &Prev, FastMathFlags FuncFMF); /// Returns true if instruction I has multiple uses in Insts static bool hasMultipleUsesOf(Instruction *I, @@ -127,11 +127,12 @@ /// Returns true if all uses of the instruction I is within the Set. static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl &Set); - /// Returns a struct describing if the instruction is a - /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y) - /// or max(X, Y). \p Prev specifies the description of an already processed - /// select instruction, so its corresponding cmp can be matched to it. - static InstDesc isMinMaxSelectCmpPattern(Instruction *I, + /// Returns a struct describing if the instruction is a llvm.(s/u)(min/max), + /// llvm.minnum/maxnum or a Select(ICmp(X, Y), X, Y) pair of instructions + /// corresponding to a min(X, Y) or max(X, Y), matching the recurrence kind \p + /// Kind. \p Prev specifies the description of an already processed select + /// instruction, so its corresponding cmp can be matched to it. + static InstDesc isMinMaxSelectCmpPattern(Instruction *I, RecurKind Kind, const InstDesc &Prev); /// Returns a struct describing if the instruction is a @@ -150,7 +151,7 @@ /// non-null, the minimal bit width needed to compute the reduction will be /// computed. static bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop, - FastMathFlags FMF, + FastMathFlags FuncFMF, RecurrenceDescriptor &RedDes, DemandedBits *DB = nullptr, AssumptionCache *AC = nullptr, Index: llvm/lib/Analysis/IVDescriptors.cpp =================================================================== --- llvm/lib/Analysis/IVDescriptors.cpp +++ llvm/lib/Analysis/IVDescriptors.cpp @@ -423,7 +423,8 @@ ((!isa(UI) && !isa(UI) && !isa(UI)) || (!isConditionalRdxPattern(Kind, UI).isRecurrence() && - !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence()))) + !isMinMaxSelectCmpPattern(UI, Kind, IgnoredVal) + .isRecurrence()))) return false; // Remember that we completed the cycle. @@ -435,8 +436,10 @@ } // This means we have seen one but not the other instruction of the - // pattern or more than just a select and cmp. - if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2) + // pattern or more than just a select and cmp. Zero implies that we saw a + // llvm.min/max instrinsic, which is always OK. + if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2 && + NumCmpSelectPatternInst != 0) return false; if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) @@ -506,10 +509,12 @@ } RecurrenceDescriptor::InstDesc -RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, +RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, RecurKind Kind, const InstDesc &Prev) { - assert((isa(I) || isa(I)) && - "Expected a cmp or select instruction"); + assert((isa(I) || isa(I) || isa(I)) && + "Expected a cmp or select or call instruction"); + if (!isMinMaxRecurrenceKind(Kind)) + return InstDesc(false, I); // We must handle the select(cmp()) as a single instruction. Advance to the // select. @@ -519,28 +524,33 @@ return InstDesc(Select, Prev.getRecKind()); } - // Only match select with single use cmp condition. - if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(), + // Only match select with single use cmp condition, or a min/max intrinsic. + if (!isa(I) && + !match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(), m_Value()))) return InstDesc(false, I); // Look for a min/max pattern. if (match(I, m_UMin(m_Value(), m_Value()))) - return InstDesc(I, RecurKind::UMin); + return InstDesc(Kind == RecurKind::UMin, I); if (match(I, m_UMax(m_Value(), m_Value()))) - return InstDesc(I, RecurKind::UMax); + return InstDesc(Kind == RecurKind::UMax, I); if (match(I, m_SMax(m_Value(), m_Value()))) - return InstDesc(I, RecurKind::SMax); + return InstDesc(Kind == RecurKind::SMax, I); if (match(I, m_SMin(m_Value(), m_Value()))) - return InstDesc(I, RecurKind::SMin); + return InstDesc(Kind == RecurKind::SMin, I); if (match(I, m_OrdFMin(m_Value(), m_Value()))) - return InstDesc(I, RecurKind::FMin); + return InstDesc(Kind == RecurKind::FMin, I); if (match(I, m_OrdFMax(m_Value(), m_Value()))) - return InstDesc(I, RecurKind::FMax); + return InstDesc(Kind == RecurKind::FMax, I); if (match(I, m_UnordFMin(m_Value(), m_Value()))) - return InstDesc(I, RecurKind::FMin); + return InstDesc(Kind == RecurKind::FMin, I); if (match(I, m_UnordFMax(m_Value(), m_Value()))) - return InstDesc(I, RecurKind::FMax); + return InstDesc(Kind == RecurKind::FMax, I); + if (match(I, m_Intrinsic(m_Value(), m_Value()))) + return InstDesc(Kind == RecurKind::FMin, I); + if (match(I, m_Intrinsic(m_Value(), m_Value()))) + return InstDesc(Kind == RecurKind::FMax, I); return InstDesc(false, I); } @@ -593,7 +603,8 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurKind Kind, - InstDesc &Prev, FastMathFlags FMF) { + InstDesc &Prev, FastMathFlags FuncFMF) { + assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind); switch (I->getOpcode()) { default: return InstDesc(false, I); @@ -624,9 +635,13 @@ LLVM_FALLTHROUGH; case Instruction::FCmp: case Instruction::ICmp: + case Instruction::Call: if (isIntMinMaxRecurrenceKind(Kind) || - (FMF.noNaNs() && FMF.noSignedZeros() && isFPMinMaxRecurrenceKind(Kind))) - return isMinMaxSelectCmpPattern(I, Prev); + (((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) || + (isa(I) && I->hasNoNaNs() && + I->hasNoSignedZeros())) && + isFPMinMaxRecurrenceKind(Kind))) + return isMinMaxSelectCmpPattern(I, Kind, Prev); return InstDesc(false, I); } } Index: llvm/test/Transforms/LoopVectorize/minmax_reduction.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/minmax_reduction.ll +++ llvm/test/Transforms/LoopVectorize/minmax_reduction.ll @@ -875,8 +875,183 @@ ret float %max.red.0 } +; CHECK-LABEL: @smin_intrinsic( +; CHECK: call <2 x i32> @llvm.smin.v2i32 +define i32 @smin_intrinsic(i32* nocapture readonly %x) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi i32 [ 100, %entry ], [ %1, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.012 + %0 = load i32, i32* %arrayidx, align 4 + %1 = tail call i32 @llvm.smin.i32(i32 %s.011, i32 %0) + %inc = add nuw nsw i32 %i.012, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i32 %1 +} + +; CHECK-LABEL: @smax_intrinsic( +; CHECK: call <2 x i32> @llvm.smax.v2i32 +define i32 @smax_intrinsic(i32* nocapture readonly %x) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi i32 [ 100, %entry ], [ %1, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.012 + %0 = load i32, i32* %arrayidx, align 4 + %1 = tail call i32 @llvm.smax.i32(i32 %s.011, i32 %0) + %inc = add nuw nsw i32 %i.012, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i32 %1 +} + +; CHECK-LABEL: @umin_intrinsic( +; CHECK: call <2 x i32> @llvm.umin.v2i32 +define i32 @umin_intrinsic(i32* nocapture readonly %x) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi i32 [ 100, %entry ], [ %1, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.012 + %0 = load i32, i32* %arrayidx, align 4 + %1 = tail call i32 @llvm.umin.i32(i32 %s.011, i32 %0) + %inc = add nuw nsw i32 %i.012, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i32 %1 +} + +; CHECK-LABEL: @umax_intrinsic( +; CHECK: call <2 x i32> @llvm.umax.v2i32 +define i32 @umax_intrinsic(i32* nocapture readonly %x) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi i32 [ 100, %entry ], [ %1, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.012 + %0 = load i32, i32* %arrayidx, align 4 + %1 = tail call i32 @llvm.umax.i32(i32 %s.011, i32 %0) + %inc = add nuw nsw i32 %i.012, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i32 %1 +} + +; CHECK-LABEL: @fmin_intrinsic( +; CHECK: call fast <2 x float> @llvm.minnum.v2f32 +define float @fmin_intrinsic(float* nocapture readonly %x) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret float %1 + +for.body: ; preds = %entry, %for.body + %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ] + %arrayidx = getelementptr inbounds float, float* %x, i32 %i.012 + %0 = load float, float* %arrayidx, align 4 + %1 = tail call fast float @llvm.minnum.f32(float %s.011, float %0) + %inc = add nuw nsw i32 %i.012, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: @fmax_intrinsic( +; CHECK: call fast <2 x float> @llvm.maxnum.v2f32 +define float @fmax_intrinsic(float* nocapture readonly %x) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret float %1 + +for.body: ; preds = %entry, %for.body + %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ] + %arrayidx = getelementptr inbounds float, float* %x, i32 %i.012 + %0 = load float, float* %arrayidx, align 4 + %1 = tail call fast float @llvm.maxnum.f32(float %s.011, float %0) + %inc = add nuw nsw i32 %i.012, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: @sminmax( +; Min and max intrinsics - don't vectorize +; CHECK-NOT: <2 x i32> +define i32 @sminmax(i32* nocapture readonly %x, i32* nocapture readonly %y) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i32 %cond9 + +for.body: ; preds = %entry, %for.body + %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.024 = phi i32 [ 0, %entry ], [ %cond9, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.025 + %0 = load i32, i32* %arrayidx, align 4 + %s.0. = tail call i32 @llvm.smin.i32(i32 %s.024, i32 %0) + %arrayidx3 = getelementptr inbounds i32, i32* %y, i32 %i.025 + %1 = load i32, i32* %arrayidx3, align 4 + %cond9 = tail call i32 @llvm.smax.i32(i32 %s.0., i32 %1) + %inc = add nuw nsw i32 %i.025, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: @sminmin( +; CHECK: call <2 x i32> @llvm.smin.v2i32 +; CHECK: call <2 x i32> @llvm.smin.v2i32 +define i32 @sminmin(i32* nocapture readonly %x, i32* nocapture readonly %y) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i32 %cond9 + +for.body: ; preds = %entry, %for.body + %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %s.024 = phi i32 [ 0, %entry ], [ %cond9, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.025 + %0 = load i32, i32* %arrayidx, align 4 + %s.0. = tail call i32 @llvm.smin.i32(i32 %s.024, i32 %0) + %arrayidx3 = getelementptr inbounds i32, i32* %y, i32 %i.025 + %1 = load i32, i32* %arrayidx3, align 4 + %cond9 = tail call i32 @llvm.smin.i32(i32 %s.0., i32 %1) + %inc = add nuw nsw i32 %i.025, 1 + %exitcond.not = icmp eq i32 %inc, 1024 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + ; Make sure any check-not directives are not triggered by function declarations. ; CHECK: declare +declare i32 @llvm.smin.i32(i32, i32) +declare i32 @llvm.smax.i32(i32, i32) +declare i32 @llvm.umin.i32(i32, i32) +declare i32 @llvm.umax.i32(i32, i32) +declare float @llvm.minnum.f32(float, float) +declare float @llvm.maxnum.f32(float, float) + attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" } attributes #1 = { "no-nans-fp-math"="true" }