Index: llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1563,40 +1563,30 @@ } Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); // If we can shrink the call to a float function rather than a double // function, do that first. + Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name)) if (Value *Ret = optimizeBinaryDoubleFP(CI, B)) return Ret; + // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to + // the intrinsics for improved optimization (for example, vectorization). + // No-signed-zeros is implied by the definitions of fmax/fmin themselves. + // From the C standard draft WG14/N1256: + // "Ideally, fmax would be sensitive to the sign of zero, for example + // fmax(-0.0, +0.0) would return +0; however, implementation in software + // might be impractical." IRBuilder<>::FastMathFlagGuard Guard(B); - FastMathFlags FMF; - if (CI->isFast()) { - // If the call is 'fast', then anything we create here will also be 'fast'. - FMF.setFast(); - } else { - // At a minimum, no-nans-fp-math must be true. - if (!CI->hasNoNaNs()) - return nullptr; - // No-signed-zeros is implied by the definitions of fmax/fmin themselves: - // "Ideally, fmax would be sensitive to the sign of zero, for example - // fmax(-0. 0, +0. 0) would return +0; however, implementation in software - // might be impractical." - FMF.setNoSignedZeros(); - FMF.setNoNaNs(); - } + FastMathFlags FMF = CI->getFastMathFlags(); + FMF.setNoSignedZeros(); B.setFastMathFlags(FMF); - // We have a relaxed floating-point environment. We can ignore NaN-handling - // and transform to a compare and select. We do not have to consider errno or - // exceptions, because fmin/fmax do not have those. - Value *Op0 = CI->getArgOperand(0); - Value *Op1 = CI->getArgOperand(1); - Value *Cmp = Callee->getName().startswith("fmin") ? - B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1); - return B.CreateSelect(Cmp, Op0, Op1); + Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum + : Intrinsic::maxnum; + Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType()); + return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) }); } Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { Index: llvm/trunk/test/Transforms/InstCombine/double-float-shrink-1.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/double-float-shrink-1.ll +++ llvm/trunk/test/Transforms/InstCombine/double-float-shrink-1.ll @@ -513,7 +513,7 @@ ; flags are propagated for shrunken *binary* double FP calls. define float @max1(float %a, float %b) { ; CHECK-LABEL: @max1( -; ISC99-NEXT: [[FMAXF:%.*]] = call arcp float @fmaxf(float [[A:%.*]], float [[B:%.*]]) +; ISC99-NEXT: [[FMAXF:%.*]] = call nsz arcp float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]]) ; ISC99-NEXT: ret float [[FMAXF]] ; ISC89: [[FMAXF:%.*]] = call arcp double @fmax(double [[A:%.*]], double [[B:%.*]]) ; @@ -524,14 +524,15 @@ ret float %f } -; A function can have a name that matches a common libcall, -; but with the wrong type(s). Let it be. +; This is treated as libm 'fmin' - LLVM types do not necessarily +; correspond to 'C' types, so this is not required to be "fminl". define float @fake_fmin(float %a, float %b) { ; CHECK-LABEL: @fake_fmin( ; CHECK-NEXT: [[C:%.*]] = fpext float [[A:%.*]] to fp128 ; CHECK-NEXT: [[D:%.*]] = fpext float [[B:%.*]] to fp128 -; CHECK-NEXT: [[E:%.*]] = call fp128 @fmin(fp128 [[C]], fp128 [[D]]) +; ISC99-NEXT: [[E:%.*]] = call nsz fp128 @llvm.minnum.f128(fp128 [[C]], fp128 [[D]]) +; ISC89-NEXT: [[E:%.*]] = call fp128 @fmin(fp128 [[C]], fp128 [[D]]) ; CHECK-NEXT: [[F:%.*]] = fptrunc fp128 [[E]] to float ; CHECK-NEXT: ret float [[F]] ; @@ -542,7 +543,7 @@ ret float %f } -declare fp128 @fmin(fp128, fp128) ; This is not the 'fmin' you're looking for. +declare fp128 @fmin(fp128, fp128) declare double @fmax(double, double) Index: llvm/trunk/test/Transforms/InstCombine/fast-math.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/fast-math.ll +++ llvm/trunk/test/Transforms/InstCombine/fast-math.ll @@ -811,17 +811,13 @@ declare fp128 @fmaxl(fp128, fp128) declare fp128 @fminl(fp128, fp128) -; No NaNs is the minimum requirement to replace these calls. -; This should always be set when unsafe-fp-math is true, but -; alternate the attributes for additional test coverage. ; 'nsz' is implied by the definition of fmax or fmin itself. -; Shrink and remove the call. +; Shrink and replace the call. define float @max1(float %a, float %b) { ; CHECK-LABEL: @max1( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast ogt float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select fast i1 [[TMP1]], float [[A]], float [[B]] -; CHECK-NEXT: ret float [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]]) +; CHECK-NEXT: ret float [[TMP1]] ; %c = fpext float %a to double %d = fpext float %b to double @@ -832,8 +828,8 @@ define float @fmax_no_fmf(float %a, float %b) { ; CHECK-LABEL: @fmax_no_fmf( -; CHECK-NEXT: [[C:%.*]] = call float @fmaxf(float [[A:%.*]], float [[B:%.*]]) -; CHECK-NEXT: ret float [[C]] +; CHECK-NEXT: [[TMP1:%.*]] = call nsz float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]]) +; CHECK-NEXT: ret float [[TMP1]] ; %c = call float @fmaxf(float %a, float %b) ret float %c @@ -841,9 +837,8 @@ define float @max2(float %a, float %b) { ; CHECK-LABEL: @max2( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz ogt float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], float [[A]], float [[B]] -; CHECK-NEXT: ret float [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call nnan nsz float @llvm.maxnum.f32(float [[A:%.*]], float [[B:%.*]]) +; CHECK-NEXT: ret float [[TMP1]] ; %c = call nnan float @fmaxf(float %a, float %b) ret float %c @@ -852,9 +847,8 @@ define double @max3(double %a, double %b) { ; CHECK-LABEL: @max3( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast ogt double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select fast i1 [[TMP1]], double [[A]], double [[B]] -; CHECK-NEXT: ret double [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.maxnum.f64(double [[A:%.*]], double [[B:%.*]]) +; CHECK-NEXT: ret double [[TMP1]] ; %c = call fast double @fmax(double %a, double %b) ret double %c @@ -862,9 +856,8 @@ define fp128 @max4(fp128 %a, fp128 %b) { ; CHECK-LABEL: @max4( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz ogt fp128 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], fp128 [[A]], fp128 [[B]] -; CHECK-NEXT: ret fp128 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call nnan nsz fp128 @llvm.maxnum.f128(fp128 [[A:%.*]], fp128 [[B:%.*]]) +; CHECK-NEXT: ret fp128 [[TMP1]] ; %c = call nnan fp128 @fmaxl(fp128 %a, fp128 %b) ret fp128 %c @@ -873,9 +866,8 @@ ; Shrink and remove the call. define float @min1(float %a, float %b) { ; CHECK-LABEL: @min1( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz olt float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], float [[A]], float [[B]] -; CHECK-NEXT: ret float [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call nnan nsz float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]]) +; CHECK-NEXT: ret float [[TMP1]] ; %c = fpext float %a to double %d = fpext float %b to double @@ -886,8 +878,8 @@ define float @fmin_no_fmf(float %a, float %b) { ; CHECK-LABEL: @fmin_no_fmf( -; CHECK-NEXT: [[C:%.*]] = call float @fminf(float [[A:%.*]], float [[B:%.*]]) -; CHECK-NEXT: ret float [[C]] +; CHECK-NEXT: [[TMP1:%.*]] = call nsz float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]]) +; CHECK-NEXT: ret float [[TMP1]] ; %c = call float @fminf(float %a, float %b) ret float %c @@ -895,9 +887,8 @@ define float @min2(float %a, float %b) { ; CHECK-LABEL: @min2( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast olt float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select fast i1 [[TMP1]], float [[A]], float [[B]] -; CHECK-NEXT: ret float [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.minnum.f32(float [[A:%.*]], float [[B:%.*]]) +; CHECK-NEXT: ret float [[TMP1]] ; %c = call fast float @fminf(float %a, float %b) ret float %c @@ -905,9 +896,8 @@ define double @min3(double %a, double %b) { ; CHECK-LABEL: @min3( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz olt double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select nnan nsz i1 [[TMP1]], double [[A]], double [[B]] -; CHECK-NEXT: ret double [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call nnan nsz double @llvm.minnum.f64(double [[A:%.*]], double [[B:%.*]]) +; CHECK-NEXT: ret double [[TMP1]] ; %c = call nnan double @fmin(double %a, double %b) ret double %c @@ -915,9 +905,8 @@ define fp128 @min4(fp128 %a, fp128 %b) { ; CHECK-LABEL: @min4( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast olt fp128 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select fast i1 [[TMP1]], fp128 [[A]], fp128 [[B]] -; CHECK-NEXT: ret fp128 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call fast fp128 @llvm.minnum.f128(fp128 [[A:%.*]], fp128 [[B:%.*]]) +; CHECK-NEXT: ret fp128 [[TMP1]] ; %c = call fast fp128 @fminl(fp128 %a, fp128 %b) ret fp128 %c Index: llvm/trunk/test/Transforms/InstCombine/float-shrink-compare.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/float-shrink-compare.ll +++ llvm/trunk/test/Transforms/InstCombine/float-shrink-compare.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -instcombine < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -356,9 +357,9 @@ define i1 @test15(float %x, float %y, float %z) { ; CHECK-LABEL: @test15( -; CHECK-NEXT: [[FMINF:%.*]] = call float @fminf(float %x, float %y) #0 -; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq float [[FMINF]], %z -; CHECK-NEXT: ret i1 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = call nsz float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Z:%.*]] +; CHECK-NEXT: ret i1 [[TMP2]] ; %1 = fpext float %x to double %2 = fpext float %y to double @@ -370,9 +371,9 @@ define i1 @test16(float %x, float %y, float %z) { ; CHECK-LABEL: @test16( -; CHECK-NEXT: [[FMINF:%.*]] = call float @fminf(float %x, float %y) #0 -; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq float [[FMINF]], %z -; CHECK-NEXT: ret i1 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = call nsz float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Z:%.*]] +; CHECK-NEXT: ret i1 [[TMP2]] ; %1 = fpext float %z to double %2 = fpext float %x to double @@ -384,9 +385,9 @@ define i1 @test17(float %x, float %y, float %z) { ; CHECK-LABEL: @test17( -; CHECK-NEXT: [[FMAXF:%.*]] = call float @fmaxf(float %x, float %y) #0 -; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq float [[FMAXF]], %z -; CHECK-NEXT: ret i1 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = call nsz float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Z:%.*]] +; CHECK-NEXT: ret i1 [[TMP2]] ; %1 = fpext float %x to double %2 = fpext float %y to double @@ -398,9 +399,9 @@ define i1 @test18(float %x, float %y, float %z) { ; CHECK-LABEL: @test18( -; CHECK-NEXT: [[FMAXF:%.*]] = call float @fmaxf(float %x, float %y) #0 -; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq float [[FMAXF]], %z -; CHECK-NEXT: ret i1 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = call nsz float @llvm.maxnum.f32(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Z:%.*]] +; CHECK-NEXT: ret i1 [[TMP2]] ; %1 = fpext float %z to double %2 = fpext float %x to double @@ -426,9 +427,9 @@ define i1 @test20(float %x, float %y) { ; CHECK-LABEL: @test20( -; CHECK-NEXT: [[FMINF:%.*]] = call float @fminf(float 1.000000e+00, float %x) #0 -; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq float [[FMINF]], %y -; CHECK-NEXT: ret i1 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = call nsz float @llvm.minnum.f32(float [[X:%.*]], float 1.000000e+00) +; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[TMP2]] ; %1 = fpext float %y to double %2 = fpext float %x to double @@ -441,9 +442,9 @@ define i1 @test21(float %x, float %y) { ; CHECK-LABEL: @test21( -; CHECK-NEXT: [[TMP1:%.*]] = fpext float %y to double -; CHECK-NEXT: [[TMP2:%.*]] = fpext float %x to double -; CHECK-NEXT: [[TMP3:%.*]] = call double @fmin(double 1.300000e+00, double [[TMP2]]) #2 +; CHECK-NEXT: [[TMP1:%.*]] = fpext float [[Y:%.*]] to double +; CHECK-NEXT: [[TMP2:%.*]] = fpext float [[X:%.*]] to double +; CHECK-NEXT: [[TMP3:%.*]] = call nsz double @llvm.minnum.f64(double [[TMP2]], double 1.300000e+00) ; CHECK-NEXT: [[TMP4:%.*]] = fcmp oeq double [[TMP3]], [[TMP1]] ; CHECK-NEXT: ret i1 [[TMP4]] ;