Index: llvm/include/llvm/Analysis/IVDescriptors.h =================================================================== --- llvm/include/llvm/Analysis/IVDescriptors.h +++ llvm/include/llvm/Analysis/IVDescriptors.h @@ -47,6 +47,8 @@ FMul, ///< Product of floats. FMin, ///< FP min implemented in terms of select(cmp()). FMax, ///< FP max implemented in terms of select(cmp()). + FMinimum, ///< FP min with llvm.minimum semantics + FMaximum, ///< FP max with llvm.maximum semantics FMulAdd, ///< Fused multiply-add of floats (a * b + c). SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop ///< invariant @@ -223,7 +225,8 @@ /// Returns true if the recurrence kind is a floating-point min/max kind. static bool isFPMinMaxRecurrenceKind(RecurKind Kind) { - return Kind == RecurKind::FMin || Kind == RecurKind::FMax; + return Kind == RecurKind::FMin || Kind == RecurKind::FMax || + Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum; } /// Returns true if the recurrence kind is any min/max kind. Index: llvm/lib/CodeGen/ExpandReductions.cpp =================================================================== --- llvm/lib/CodeGen/ExpandReductions.cpp +++ llvm/lib/CodeGen/ExpandReductions.cpp @@ -49,6 +49,8 @@ return Instruction::ICmp; case Intrinsic::vector_reduce_fmax: case Intrinsic::vector_reduce_fmin: + case Intrinsic::vector_reduce_fmaximum: + case Intrinsic::vector_reduce_fminimum: return Instruction::FCmp; default: llvm_unreachable("Unexpected ID"); @@ -69,6 +71,10 @@ return RecurKind::FMax; case Intrinsic::vector_reduce_fmin: return RecurKind::FMin; + case Intrinsic::vector_reduce_fmaximum: + return RecurKind::FMaximum; + case Intrinsic::vector_reduce_fminimum: + return RecurKind::FMinimum; default: return RecurKind::None; } @@ -94,6 +100,8 @@ case Intrinsic::vector_reduce_umin: case Intrinsic::vector_reduce_fmax: case Intrinsic::vector_reduce_fmin: + case Intrinsic::vector_reduce_fminimum: + case Intrinsic::vector_reduce_fmaximum: if (TTI->shouldExpandReduction(II)) Worklist.push_back(II); @@ -191,6 +199,16 @@ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK); break; } + case Intrinsic::vector_reduce_fmaximum: + case Intrinsic::vector_reduce_fminimum: { + Value *Vec = II->getArgOperand(0); + if (!isPowerOf2_32( + cast(Vec->getType())->getNumElements())) + continue; + + Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK); + break; + } } II->replaceAllUsesWith(Rdx); II->eraseFromParent(); Index: llvm/lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUtils.cpp +++ llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -34,6 +34,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" @@ -909,6 +910,10 @@ return Intrinsic::minnum; case RecurKind::FMax: return Intrinsic::maxnum; + case RecurKind::FMinimum: + return Intrinsic::minimum; + case RecurKind::FMaximum: + return Intrinsic::maximum; } } @@ -943,7 +948,8 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right) { Type *Ty = Left->getType(); - if (Ty->isIntOrIntVectorTy()) { + if (Ty->isIntOrIntVectorTy() || + (RK == RecurKind::FMinimum || RK == RecurKind::FMaximum)) { // TODO: Add float minnum/maxnum support when FMF nnan is set. Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK); return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr, Index: llvm/test/CodeGen/Generic/expand-experimental-reductions.ll =================================================================== --- llvm/test/CodeGen/Generic/expand-experimental-reductions.ll +++ llvm/test/CodeGen/Generic/expand-experimental-reductions.ll @@ -18,6 +18,10 @@ declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>) +declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float>) +declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double>) + declare i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8>) define i64 @add_i64(<2 x i64> %vec) { @@ -299,6 +303,62 @@ ret double %r } +define double @fminimum_f64(<2 x double> %vec) { +; CHECK-LABEL: @fminimum_f64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <2 x double> @llvm.minimum.v2f64(<2 x double> [[VEC]], <2 x double> [[RDX_SHUF]]) +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX]], i32 0 +; CHECK-NEXT: ret double [[TMP0]] +; +entry: + %r = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %vec) + ret double %r +} + +define double @fmaximum_f64(<2 x double> %vec) { +; CHECK-LABEL: @fmaximum_f64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[VEC]], <2 x double> [[RDX_SHUF]]) +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX]], i32 0 +; CHECK-NEXT: ret double [[TMP0]] +; +entry: + %r = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %vec) + ret double %r +} + +define float @fmaximum_f32(<4 x float> %vec) { +; CHECK-LABEL: @fmaximum_f32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <4 x float> @llvm.maximum.v4f32(<4 x float> [[VEC]], <4 x float> [[RDX_SHUF]]) +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[RDX_MINMAX]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RDX_MINMAX2:%.*]] = call <4 x float> @llvm.maximum.v4f32(<4 x float> [[RDX_MINMAX]], <4 x float> [[RDX_SHUF1]]) +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[RDX_MINMAX2]], i32 0 +; CHECK-NEXT: ret float [[TMP0]] +; +entry: + %r = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %vec) + ret float %r +} + +define float @fmaximum_f32_nnan(<4 x float> %vec) { +; CHECK-LABEL: @fmaximum_f32_nnan( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call nnan <4 x float> @llvm.maximum.v4f32(<4 x float> [[VEC]], <4 x float> [[RDX_SHUF]]) +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[RDX_MINMAX]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RDX_MINMAX2:%.*]] = call nnan <4 x float> @llvm.maximum.v4f32(<4 x float> [[RDX_MINMAX]], <4 x float> [[RDX_SHUF1]]) +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[RDX_MINMAX2]], i32 0 +; CHECK-NEXT: ret float [[TMP0]] +; +entry: + %r = call nnan float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %vec) + ret float %r +} + ; FIXME: Why is this not expanded? ; Test when the vector size is not power of two.