Skip to content

Commit 57fd1dc

Browse files
committedAug 16, 2015
transform fmin/fmax calls when possible (PR24314)
If we can ignore NaNs, fmin/fmax libcalls can become compare and select (this is what we turn std::min / std::max into). This IR should then be optimized in the backend to whatever is best for any given target. Eg, x86 can use minss/maxss instructions. This should solve PR24314: https://llvm.org/bugs/show_bug.cgi?id=24314 Differential Revision: http://reviews.llvm.org/D11866 llvm-svn: 245187
1 parent 3278b7c commit 57fd1dc

File tree

3 files changed

+169
-2
lines changed

3 files changed

+169
-2
lines changed
 

‎llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h

+1
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ class LibCallSimplifier {
131131
Value *optimizePow(CallInst *CI, IRBuilder<> &B);
132132
Value *optimizeExp2(CallInst *CI, IRBuilder<> &B);
133133
Value *optimizeFabs(CallInst *CI, IRBuilder<> &B);
134+
Value *optimizeFMinFMax(CallInst *CI, IRBuilder<> &B);
134135
Value *optimizeSqrt(CallInst *CI, IRBuilder<> &B);
135136
Value *optimizeSinCosPi(CallInst *CI, IRBuilder<> &B);
136137

‎llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

+61-2
Original file line numberDiff line numberDiff line change
@@ -1184,6 +1184,60 @@ Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
11841184
return Ret;
11851185
}
11861186

1187+
Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
1188+
// If we can shrink the call to a float function rather than a double
1189+
// function, do that first.
1190+
Function *Callee = CI->getCalledFunction();
1191+
if ((Callee->getName() == "fmin" && TLI->has(LibFunc::fminf)) ||
1192+
(Callee->getName() == "fmax" && TLI->has(LibFunc::fmaxf))) {
1193+
Value *Ret = optimizeBinaryDoubleFP(CI, B);
1194+
if (Ret)
1195+
return Ret;
1196+
}
1197+
1198+
// Make sure this has 2 arguments of FP type which match the result type.
1199+
FunctionType *FT = Callee->getFunctionType();
1200+
if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
1201+
FT->getParamType(0) != FT->getParamType(1) ||
1202+
!FT->getParamType(0)->isFloatingPointTy())
1203+
return nullptr;
1204+
1205+
// FIXME: For finer-grain optimization, we need intrinsics to have the same
1206+
// fast-math flag decorations that are applied to FP instructions. For now,
1207+
// we have to rely on the function-level attributes to do this optimization
1208+
// because there's no other way to express that the calls can be relaxed.
1209+
IRBuilder<true, ConstantFolder,
1210+
IRBuilderDefaultInserter<true> >::FastMathFlagGuard Guard(B);
1211+
FastMathFlags FMF;
1212+
Function *F = CI->getParent()->getParent();
1213+
Attribute Attr = F->getFnAttribute("unsafe-fp-math");
1214+
if (Attr.getValueAsString() == "true") {
1215+
// Unsafe algebra sets all fast-math-flags to true.
1216+
FMF.setUnsafeAlgebra();
1217+
} else {
1218+
// At a minimum, no-nans-fp-math must be true.
1219+
Attr = F->getFnAttribute("no-nans-fp-math");
1220+
if (Attr.getValueAsString() != "true")
1221+
return nullptr;
1222+
// No-signed-zeros is implied by the definitions of fmax/fmin themselves:
1223+
// "Ideally, fmax would be sensitive to the sign of zero, for example
1224+
// fmax(−0. 0, +0. 0) would return +0; however, implementation in software
1225+
// might be impractical."
1226+
FMF.setNoSignedZeros();
1227+
FMF.setNoNaNs();
1228+
}
1229+
B.SetFastMathFlags(FMF);
1230+
1231+
// We have a relaxed floating-point environment. We can ignore NaN-handling
1232+
// and transform to a compare and select. We do not have to consider errno or
1233+
// exceptions, because fmin/fmax do not have those.
1234+
Value *Op0 = CI->getArgOperand(0);
1235+
Value *Op1 = CI->getArgOperand(1);
1236+
Value *Cmp = Callee->getName().startswith("fmin") ?
1237+
B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1);
1238+
return B.CreateSelect(Cmp, Op0, Op1);
1239+
}
1240+
11871241
Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
11881242
Function *Callee = CI->getCalledFunction();
11891243

@@ -2110,11 +2164,16 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
21102164
return optimizeUnaryDoubleFP(CI, Builder, true);
21112165
return nullptr;
21122166
case LibFunc::copysign:
2113-
case LibFunc::fmin:
2114-
case LibFunc::fmax:
21152167
if (hasFloatVersion(FuncName))
21162168
return optimizeBinaryDoubleFP(CI, Builder);
21172169
return nullptr;
2170+
case LibFunc::fminf:
2171+
case LibFunc::fmin:
2172+
case LibFunc::fminl:
2173+
case LibFunc::fmaxf:
2174+
case LibFunc::fmax:
2175+
case LibFunc::fmaxl:
2176+
return optimizeFMinFMax(CI, Builder);
21182177
default:
21192178
return nullptr;
21202179
}

‎llvm/test/Transforms/InstCombine/fast-math.ll

+107
Original file line numberDiff line numberDiff line change
@@ -716,3 +716,110 @@ define fp128 @sqrt_call_squared_f128(fp128 %x) #0 {
716716
; CHECK-NEXT: ret fp128 %fabs
717717
}
718718

719+
; =========================================================================
720+
;
721+
; Test-cases for fmin / fmax
722+
;
723+
; =========================================================================
724+
725+
declare double @fmax(double, double)
726+
declare double @fmin(double, double)
727+
declare float @fmaxf(float, float)
728+
declare float @fminf(float, float)
729+
declare fp128 @fmaxl(fp128, fp128)
730+
declare fp128 @fminl(fp128, fp128)
731+
732+
; No NaNs is the minimum requirement to replace these calls.
733+
; This should always be set when unsafe-fp-math is true, but
734+
; alternate the attributes for additional test coverage.
735+
; 'nsz' is implied by the definition of fmax or fmin itself.
736+
attributes #1 = { "no-nans-fp-math" = "true" }
737+
738+
; Shrink and remove the call.
739+
define float @max1(float %a, float %b) #0 {
740+
%c = fpext float %a to double
741+
%d = fpext float %b to double
742+
%e = call double @fmax(double %c, double %d)
743+
%f = fptrunc double %e to float
744+
ret float %f
745+
746+
; CHECK-LABEL: max1(
747+
; CHECK-NEXT: fcmp fast ogt float %a, %b
748+
; CHECK-NEXT: select {{.*}} float %a, float %b
749+
; CHECK-NEXT: ret
750+
}
751+
752+
define float @max2(float %a, float %b) #1 {
753+
%c = call float @fmaxf(float %a, float %b)
754+
ret float %c
755+
756+
; CHECK-LABEL: max2(
757+
; CHECK-NEXT: fcmp nnan nsz ogt float %a, %b
758+
; CHECK-NEXT: select {{.*}} float %a, float %b
759+
; CHECK-NEXT: ret
760+
}
761+
762+
763+
define double @max3(double %a, double %b) #0 {
764+
%c = call double @fmax(double %a, double %b)
765+
ret double %c
766+
767+
; CHECK-LABEL: max3(
768+
; CHECK-NEXT: fcmp fast ogt double %a, %b
769+
; CHECK-NEXT: select {{.*}} double %a, double %b
770+
; CHECK-NEXT: ret
771+
}
772+
773+
define fp128 @max4(fp128 %a, fp128 %b) #1 {
774+
%c = call fp128 @fmaxl(fp128 %a, fp128 %b)
775+
ret fp128 %c
776+
777+
; CHECK-LABEL: max4(
778+
; CHECK-NEXT: fcmp nnan nsz ogt fp128 %a, %b
779+
; CHECK-NEXT: select {{.*}} fp128 %a, fp128 %b
780+
; CHECK-NEXT: ret
781+
}
782+
783+
; Shrink and remove the call.
784+
define float @min1(float %a, float %b) #1 {
785+
%c = fpext float %a to double
786+
%d = fpext float %b to double
787+
%e = call double @fmin(double %c, double %d)
788+
%f = fptrunc double %e to float
789+
ret float %f
790+
791+
; CHECK-LABEL: min1(
792+
; CHECK-NEXT: fcmp nnan nsz olt float %a, %b
793+
; CHECK-NEXT: select {{.*}} float %a, float %b
794+
; CHECK-NEXT: ret
795+
}
796+
797+
define float @min2(float %a, float %b) #0 {
798+
%c = call float @fminf(float %a, float %b)
799+
ret float %c
800+
801+
; CHECK-LABEL: min2(
802+
; CHECK-NEXT: fcmp fast olt float %a, %b
803+
; CHECK-NEXT: select {{.*}} float %a, float %b
804+
; CHECK-NEXT: ret
805+
}
806+
807+
define double @min3(double %a, double %b) #1 {
808+
%c = call double @fmin(double %a, double %b)
809+
ret double %c
810+
811+
; CHECK-LABEL: min3(
812+
; CHECK-NEXT: fcmp nnan nsz olt double %a, %b
813+
; CHECK-NEXT: select {{.*}} double %a, double %b
814+
; CHECK-NEXT: ret
815+
}
816+
817+
define fp128 @min4(fp128 %a, fp128 %b) #0 {
818+
%c = call fp128 @fminl(fp128 %a, fp128 %b)
819+
ret fp128 %c
820+
821+
; CHECK-LABEL: min4(
822+
; CHECK-NEXT: fcmp fast olt fp128 %a, %b
823+
; CHECK-NEXT: select {{.*}} fp128 %a, fp128 %b
824+
; CHECK-NEXT: ret
825+
}

0 commit comments

Comments
 (0)
Please sign in to comment.