Index: llvm/trunk/include/llvm/IR/PatternMatch.h =================================================================== --- llvm/trunk/include/llvm/IR/PatternMatch.h +++ llvm/trunk/include/llvm/IR/PatternMatch.h @@ -234,11 +234,35 @@ return false; } }; +// Either constexpr if or renaming ConstantFP::getValueAPF to +// ConstantFP::getValue is needed to do it via single template +// function for both apint/apfloat. +struct apfloat_match { + const APFloat *&Res; + apfloat_match(const APFloat *&R) : Res(R) {} + template bool match(ITy *V) { + if (auto *CI = dyn_cast(V)) { + Res = &CI->getValueAPF(); + return true; + } + if (V->getType()->isVectorTy()) + if (const auto *C = dyn_cast(V)) + if (auto *CI = dyn_cast_or_null(C->getSplatValue())) { + Res = &CI->getValueAPF(); + return true; + } + return false; + } +}; /// \brief Match a ConstantInt or splatted ConstantVector, binding the /// specified pointer to the contained APInt. inline apint_match m_APInt(const APInt *&Res) { return Res; } +/// \brief Match a ConstantFP or splatted ConstantVector, binding the +/// specified pointer to the contained APFloat. +inline apfloat_match m_APFloat(const APFloat *&Res) { return Res; } + template struct constantint_match { template bool match(ITy *V) { if (const auto *CI = dyn_cast(V)) { Index: llvm/trunk/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/trunk/lib/Analysis/ValueTracking.cpp +++ llvm/trunk/lib/Analysis/ValueTracking.cpp @@ -3994,6 +3994,62 @@ return false; } +/// Match clamp pattern for float types without care about NaNs or signed zeros. +/// Given non-min/max outer cmp/select from the clamp pattern this +/// function recognizes if it can be substitued by a "canonical" min/max +/// pattern. +static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred, + Value *CmpLHS, Value *CmpRHS, + Value *TrueVal, Value *FalseVal, + Value *&LHS, Value *&RHS) { + // Try to match + // X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2)) + // X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2)) + // and return description of the outer Max/Min. + + // First, check if select has inverse order: + if (CmpRHS == FalseVal) { + std::swap(TrueVal, FalseVal); + Pred = CmpInst::getInversePredicate(Pred); + } + + // Assume success now. If there's no match, callers should not use these anyway. + LHS = TrueVal; + RHS = FalseVal; + + const APFloat *FC1; + if (CmpRHS != TrueVal || !match(CmpRHS, m_APFloat(FC1)) || !FC1->isFinite()) + return {SPF_UNKNOWN, SPNB_NA, false}; + + const APFloat *FC2; + switch (Pred) { + case CmpInst::FCMP_OLT: + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_ULT: + case CmpInst::FCMP_ULE: + if (match(FalseVal, + m_CombineOr(m_OrdFMin(m_Specific(CmpLHS), m_APFloat(FC2)), + m_UnordFMin(m_Specific(CmpLHS), m_APFloat(FC2)))) && + FC1->compare(*FC2) == APFloat::cmpResult::cmpLessThan) + return {SPF_FMAXNUM, SPNB_RETURNS_ANY, false}; + break; + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_OGE: + case CmpInst::FCMP_UGT: + case CmpInst::FCMP_UGE: + if (match(FalseVal, + m_CombineOr(m_OrdFMax(m_Specific(CmpLHS), m_APFloat(FC2)), + m_UnordFMax(m_Specific(CmpLHS), m_APFloat(FC2)))) && + FC1->compare(*FC2) == APFloat::cmpResult::cmpGreaterThan) + return {SPF_FMINNUM, SPNB_RETURNS_ANY, false}; + break; + default: + break; + } + + return {SPF_UNKNOWN, SPNB_NA, false}; +} + /// Match non-obvious integer minimum and maximum sequences. static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, Value *CmpLHS, Value *CmpRHS, @@ -4201,7 +4257,18 @@ } } - return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); + if (CmpInst::isIntPredicate(Pred)) + return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); + + // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar + // may return either -0.0 or 0.0, so fcmp/select pair has stricter + // semantics than minNum. Be conservative in such case. + if (NaNBehavior != SPNB_RETURNS_ANY || + (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && + !isKnownNonZero(CmpRHS))) + return {SPF_UNKNOWN, SPNB_NA, false}; + + return matchFastFloatClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); } static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1389,9 +1389,17 @@ auto SPF = SPR.Flavor; if (SelectPatternResult::isMinOrMax(SPF)) { - // Canonicalize so that type casts are outside select patterns. - if (LHS->getType()->getPrimitiveSizeInBits() != - SelType->getPrimitiveSizeInBits()) { + // Canonicalize so that + // - type casts are outside select patterns. + // - float clamp is transformed to min/max pattern + + bool IsCastNeeded = LHS->getType() != SelType; + Value *CmpLHS = cast(CondVal)->getOperand(0); + Value *CmpRHS = cast(CondVal)->getOperand(1); + if (IsCastNeeded || + (LHS->getType()->isFPOrFPVectorTy() && + ((CmpLHS != LHS && CmpLHS != RHS) || + (CmpRHS != LHS && CmpRHS != RHS)))) { CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF, SPR.Ordered); Value *Cmp; @@ -1404,10 +1412,12 @@ Cmp = Builder.CreateFCmp(Pred, LHS, RHS); } - Value *NewSI = Builder.CreateCast( - CastOp, Builder.CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI), - SelType); - return replaceInstUsesWith(SI, NewSI); + Value *NewSI = Builder.CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI); + if (!IsCastNeeded) + return replaceInstUsesWith(SI, NewSI); + + Value *NewCast = Builder.CreateCast(CastOp, NewSI, SelType); + return replaceInstUsesWith(SI, NewCast); } } Index: llvm/trunk/test/Transforms/InstCombine/clamp-to-minmax.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/clamp-to-minmax.ll +++ llvm/trunk/test/Transforms/InstCombine/clamp-to-minmax.ll @@ -7,9 +7,9 @@ ; CHECK-LABEL: @clamp_float_fast_ordered_strict_maxmin( ; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02 ; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02 -; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast olt float [[X]], 1.000000e+00 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[DOTINV:%.*]] = fcmp fast oge float [[MIN]], 1.000000e+00 +; CHECK-NEXT: [[R1:%.*]] = select i1 [[DOTINV]], float [[MIN]], float 1.000000e+00 +; CHECK-NEXT: ret float [[R1]] ; %cmp2 = fcmp fast olt float %x, 255.0 %min = select i1 %cmp2, float %x, float 255.0 @@ -24,9 +24,9 @@ ; CHECK-LABEL: @clamp_float_fast_ordered_nonstrict_maxmin( ; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02 ; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02 -; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ole float [[X]], 1.000000e+00 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[DOTINV:%.*]] = fcmp fast oge float [[MIN]], 1.000000e+00 +; CHECK-NEXT: [[R1:%.*]] = select i1 [[DOTINV]], float [[MIN]], float 1.000000e+00 +; CHECK-NEXT: ret float [[R1]] ; %cmp2 = fcmp fast olt float %x, 255.0 %min = select i1 %cmp2, float %x, float 255.0 @@ -41,9 +41,9 @@ ; CHECK-LABEL: @clamp_float_fast_ordered_strict_minmax( ; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast ogt float [[X:%.*]], 1.000000e+00 ; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00 -; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ogt float [[X]], 2.550000e+02 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[DOTINV:%.*]] = fcmp fast ole float [[MAX]], 2.550000e+02 +; CHECK-NEXT: [[R1:%.*]] = select i1 [[DOTINV]], float [[MAX]], float 2.550000e+02 +; CHECK-NEXT: ret float [[R1]] ; %cmp2 = fcmp fast ogt float %x, 1.0 %max = select i1 %cmp2, float %x, float 1.0 @@ -58,9 +58,9 @@ ; CHECK-LABEL: @clamp_float_fast_ordered_nonstrict_minmax( ; CHECK-NEXT: [[CMP2:%.*]] = fcmp fast ogt float [[X:%.*]], 1.000000e+00 ; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00 -; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast oge float [[X]], 2.550000e+02 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[DOTINV:%.*]] = fcmp fast ole float [[MAX]], 2.550000e+02 +; CHECK-NEXT: [[R1:%.*]] = select i1 [[DOTINV]], float [[MAX]], float 2.550000e+02 +; CHECK-NEXT: ret float [[R1]] ; %cmp2 = fcmp fast ogt float %x, 1.0 %max = select i1 %cmp2, float %x, float 1.0 @@ -78,9 +78,9 @@ ; CHECK-LABEL: @clamp_float_fast_unordered_strict_maxmin( ; CHECK-NEXT: [[CMP2_INV:%.*]] = fcmp fast oge float [[X:%.*]], 2.550000e+02 ; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2_INV]], float 2.550000e+02, float [[X]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ult float [[X]], 1.000000e+00 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[DOTINV:%.*]] = fcmp fast oge float [[MIN]], 1.000000e+00 +; CHECK-NEXT: [[R1:%.*]] = select i1 [[DOTINV]], float [[MIN]], float 1.000000e+00 +; CHECK-NEXT: ret float [[R1]] ; %cmp2 = fcmp fast ult float %x, 255.0 %min = select i1 %cmp2, float %x, float 255.0 @@ -95,9 +95,9 @@ ; CHECK-LABEL: @clamp_float_fast_unordered_nonstrict_maxmin( ; CHECK-NEXT: [[CMP2_INV:%.*]] = fcmp fast oge float [[X:%.*]], 2.550000e+02 ; CHECK-NEXT: [[MIN:%.*]] = select i1 [[CMP2_INV]], float 2.550000e+02, float [[X]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ule float [[X]], 1.000000e+00 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[DOTINV:%.*]] = fcmp fast oge float [[MIN]], 1.000000e+00 +; CHECK-NEXT: [[R1:%.*]] = select i1 [[DOTINV]], float [[MIN]], float 1.000000e+00 +; CHECK-NEXT: ret float [[R1]] ; %cmp2 = fcmp fast ult float %x, 255.0 %min = select i1 %cmp2, float %x, float 255.0 @@ -112,9 +112,9 @@ ; CHECK-LABEL: @clamp_float_fast_unordered_strict_minmax( ; CHECK-NEXT: [[CMP2_INV:%.*]] = fcmp fast ole float [[X:%.*]], 1.000000e+00 ; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2_INV]], float 1.000000e+00, float [[X]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ugt float [[X]], 2.550000e+02 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[DOTINV:%.*]] = fcmp fast ole float [[MAX]], 2.550000e+02 +; CHECK-NEXT: [[R1:%.*]] = select i1 [[DOTINV]], float [[MAX]], float 2.550000e+02 +; CHECK-NEXT: ret float [[R1]] ; %cmp2 = fcmp fast ugt float %x, 1.0 %max = select i1 %cmp2, float %x, float 1.0 @@ -129,9 +129,9 @@ ; CHECK-LABEL: @clamp_float_fast_unordered_nonstrict_minmax( ; CHECK-NEXT: [[CMP2_INV:%.*]] = fcmp fast ole float [[X:%.*]], 1.000000e+00 ; CHECK-NEXT: [[MAX:%.*]] = select i1 [[CMP2_INV]], float 1.000000e+00, float [[X]] -; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast uge float [[X]], 2.550000e+02 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[DOTINV:%.*]] = fcmp fast ole float [[MAX]], 2.550000e+02 +; CHECK-NEXT: [[R1:%.*]] = select i1 [[DOTINV]], float [[MAX]], float 2.550000e+02 +; CHECK-NEXT: ret float [[R1]] ; %cmp2 = fcmp fast ugt float %x, 1.0 %max = select i1 %cmp2, float %x, float 1.0 @@ -148,9 +148,9 @@ ; CHECK-LABEL: @clamp_test_1( ; CHECK-NEXT: [[INNER_CMP_INV:%.*]] = fcmp fast oge float [[X:%.*]], 2.550000e+02 ; CHECK-NEXT: [[INNER_SEL:%.*]] = select i1 [[INNER_CMP_INV]], float 2.550000e+02, float [[X]] -; CHECK-NEXT: [[OUTER_CMP:%.*]] = fcmp fast ugt float [[X]], 1.000000e+00 -; CHECK-NEXT: [[R:%.*]] = select i1 [[OUTER_CMP]], float [[INNER_SEL]], float 1.000000e+00 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[DOTINV:%.*]] = fcmp fast oge float [[INNER_SEL]], 1.000000e+00 +; CHECK-NEXT: [[R1:%.*]] = select i1 [[DOTINV]], float [[INNER_SEL]], float 1.000000e+00 +; CHECK-NEXT: ret float [[R1]] ; %inner_cmp = fcmp fast ult float %x, 255.0 %inner_sel = select i1 %inner_cmp, float %x, float 255.0 @@ -503,11 +503,11 @@ ;; Check casts behavior define float @ui32_clamp_and_cast_to_float(i32 %x) { ; CHECK-LABEL: @ui32_clamp_and_cast_to_float( -; CHECK-NEXT: [[F_X:%.*]] = uitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[UP_CMP:%.*]] = icmp ugt i32 [[X]], 255 -; CHECK-NEXT: [[LO_CMP:%.*]] = icmp eq i32 [[X]], 0 -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[UP_CMP]], float 2.550000e+02, float [[F_X]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[LO_CMP]], float 1.000000e+00, float [[MIN]] +; CHECK-NEXT: [[LO_CMP:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X]], 255 +; CHECK-NEXT: [[MIN1:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255 +; CHECK-NEXT: [[TMP2:%.*]] = uitofp i32 [[MIN1]] to float +; CHECK-NEXT: [[R:%.*]] = select i1 [[LO_CMP]], float 1.000000e+00, float [[TMP2]] ; CHECK-NEXT: ret float [[R]] ; %f_x = uitofp i32 %x to float @@ -539,10 +539,10 @@ ; CHECK-LABEL: @mixed_clamp_to_float_1( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 255 ; CHECK-NEXT: [[SI_MIN:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255 -; CHECK-NEXT: [[F_MIN:%.*]] = sitofp i32 [[SI_MIN]] to float -; CHECK-NEXT: [[LO_CMP:%.*]] = icmp slt i32 [[X]], 1 -; CHECK-NEXT: [[R:%.*]] = select i1 [[LO_CMP]], float 1.000000e+00, float [[F_MIN]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[SI_MIN]], 1 +; CHECK-NEXT: [[R1:%.*]] = select i1 [[TMP2]], i32 [[SI_MIN]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = sitofp i32 [[R1]] to float +; CHECK-NEXT: ret float [[TMP3]] ; %si_min_cmp = icmp sgt i32 %x, 255 %si_min = select i1 %si_min_cmp, i32 255, i32 %x @@ -576,10 +576,10 @@ ; CHECK-LABEL: @mixed_clamp_to_float_2( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 255 ; CHECK-NEXT: [[SI_MIN:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255 -; CHECK-NEXT: [[F_MIN:%.*]] = sitofp i32 [[SI_MIN]] to float -; CHECK-NEXT: [[LO_CMP:%.*]] = icmp slt i32 [[X]], 1 -; CHECK-NEXT: [[R:%.*]] = select i1 [[LO_CMP]], float 1.000000e+00, float [[F_MIN]] -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[SI_MIN]], 1 +; CHECK-NEXT: [[R1:%.*]] = select i1 [[TMP2]], i32 [[SI_MIN]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = sitofp i32 [[R1]] to float +; CHECK-NEXT: ret float [[TMP3]] ; %si_min_cmp = icmp sgt i32 %x, 255 %si_min = select i1 %si_min_cmp, i32 255, i32 %x Index: llvm/trunk/test/Transforms/InstCombine/minmax-fold.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/minmax-fold.ll +++ llvm/trunk/test/Transforms/InstCombine/minmax-fold.ll @@ -139,10 +139,10 @@ define float @t10(i32 %x) { ; CHECK-LABEL: @t10( -; CHECK-NEXT: [[F_X:%.*]] = sitofp i32 [[X:%.*]] to float -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], 255 -; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], float [[F_X]], float 2.550000e+02 -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 255 +; CHECK-NEXT: [[R1:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255 +; CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[R1]] to float +; CHECK-NEXT: ret float [[TMP2]] ; %f_x = sitofp i32 %x to float %cmp = icmp sgt i32 %x, 255 Index: llvm/trunk/test/Transforms/InstCombine/pr27236.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/pr27236.ll +++ llvm/trunk/test/Transforms/InstCombine/pr27236.ll @@ -6,9 +6,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[SCALE:%.*]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[SCALE]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to float -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], 0 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP4]], float [[TMP3]], float 0.000000e+00 -; CHECK-NEXT: ret float [[SEL]] +; CHECK-NEXT: ret float [[TMP3]] ; %1 = icmp sgt i32 1, %scale %2 = select i1 %1, i32 1, i32 %scale