Index: include/llvm/Transforms/Utils/SimplifyLibCalls.h =================================================================== --- include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -128,7 +128,6 @@ Value *optimizeCos(CallInst *CI, IRBuilder<> &B); Value *optimizePow(CallInst *CI, IRBuilder<> &B); Value *optimizeExp2(CallInst *CI, IRBuilder<> &B); - Value *optimizeFabs(CallInst *CI, IRBuilder<> &B); Value *optimizeFMinFMax(CallInst *CI, IRBuilder<> &B); Value *optimizeLog(CallInst *CI, IRBuilder<> &B); Value *optimizeSqrt(CallInst *CI, IRBuilder<> &B); Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1635,11 +1635,20 @@ return SelectInst::Create(Cond, Call0, Call1); } + LLVM_FALLTHROUGH; + } + case Intrinsic::ceil: + case Intrinsic::floor: + case Intrinsic::rint: + case Intrinsic::round: + case Intrinsic::nearbyint: + case Intrinsic::trunc: { Value *ExtSrc; + if (match(II->getArgOperand(0), m_FPExt(m_Value(ExtSrc))) && II->getArgOperand(0)->hasOneUse()) { // fabs (fpext x) -> fpext (fabs x) - Value *F = Intrinsic::getDeclaration(II->getModule(), Intrinsic::fabs, + Value *F = Intrinsic::getDeclaration(II->getModule(), II->getIntrinsicID(), { ExtSrc->getType() }); Value *NewFabs = Builder->CreateCall(F, ExtSrc); NewFabs->takeName(II); Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1393,7 +1393,14 @@ if (II) { switch (II->getIntrinsicID()) { default: break; - case Intrinsic::fabs: { + case Intrinsic::fabs: + case Intrinsic::ceil: + case Intrinsic::floor: + case Intrinsic::rint: + case Intrinsic::round: + case Intrinsic::nearbyint: + case Intrinsic::trunc: { + // Do unary FP operation on smaller type. // (fptrunc (fabs x)) -> (fabs (fptrunc x)) Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0), CI.getType()); Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -948,6 +948,20 @@ return B.CreateFPExt(V, B.getDoubleTy()); } +// Replace a libcall \p CI with a call to intrinsic \p IID +static Value *replaceUnaryCall(CallInst *CI, IRBuilder<> &B, Intrinsic::ID IID) { + // Propagate fast-math flags from the existing call to the new call. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + + Module *M = CI->getModule(); + Value *V = CI->getArgOperand(0); + Function *F = Intrinsic::getDeclaration(M, IID, CI->getType()); + CallInst *NewCall = B.CreateCall(F, V); + NewCall->takeName(CI); + return NewCall; +} + /// Shrink double -> float for binary functions like 'fmin/fmax'. static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); @@ -1207,19 +1221,6 @@ return Ret; } -Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(CI->getFastMathFlags()); - - // fabs/fabsf -> llvm.fabs.* - Value *F = Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::fabs, - CI->getType()); - Value *NewCall = B.CreateCall(F, { CI->getArgOperand(0) }); - NewCall->takeName(CI); - return NewCall; -} - Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); // If we can shrink the call to a float function rather than a double @@ -2088,7 +2089,7 @@ case LibFunc::fabsf: case LibFunc::fabs: case LibFunc::fabsl: - return optimizeFabs(CI, Builder); + return replaceUnaryCall(CI, Builder, Intrinsic::fabs); case LibFunc::sqrtf: case LibFunc::sqrt: case LibFunc::sqrtl: @@ -2141,14 +2142,17 @@ case LibFunc::fputc: return optimizeErrorReporting(CI, Builder, 1); case LibFunc::ceil: + return replaceUnaryCall(CI, Builder, Intrinsic::ceil); case LibFunc::floor: + return replaceUnaryCall(CI, Builder, Intrinsic::floor); case LibFunc::rint: + return replaceUnaryCall(CI, Builder, Intrinsic::rint); case LibFunc::round: + return replaceUnaryCall(CI, Builder, Intrinsic::round); case LibFunc::nearbyint: + return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint); case LibFunc::trunc: - if (hasFloatVersion(FuncName)) - return optimizeUnaryDoubleFP(CI, Builder, false); - return nullptr; + return replaceUnaryCall(CI, Builder, Intrinsic::trunc); case LibFunc::acos: case LibFunc::acosh: case LibFunc::asin: @@ -2212,16 +2216,10 @@ // * log(exp10(y)) -> y*log(10) // * log(sqrt(x)) -> 0.5*log(x) // -// lround, lroundf, lroundl: -// * lround(cnst) -> cnst' -// // pow, powf, powl: // * pow(sqrt(x),y) -> pow(x,y*0.5) // * pow(pow(x,y),z)-> pow(x,y*z) // -// round, roundf, roundl: -// * round(cnst) -> cnst' -// // signbit: // * signbit(cnst) -> cnst' // * signbit(nncst) -> 0 (if pstv is a non-negative constant) @@ -2231,10 +2229,6 @@ // * sqrt(Nroot(x)) -> pow(x,1/(2*N)) // * sqrt(pow(x,y)) -> pow(|x|,y*0.5) // -// trunc, truncf, truncl: -// * trunc(cnst) -> cnst' -// -// //===----------------------------------------------------------------------===// // Fortified Library Call Optimizations Index: test/Transforms/InstCombine/double-float-shrink-2.ll =================================================================== --- test/Transforms/InstCombine/double-float-shrink-2.ll +++ test/Transforms/InstCombine/double-float-shrink-2.ll @@ -5,21 +5,21 @@ ; RUN: opt < %s -instcombine -S -mtriple "x86_64-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s ; RUN: opt < %s -instcombine -S -mtriple "sparc-sun-solaris" | FileCheck -check-prefix=DO-SIMPLIFY %s -; DO-SIMPLIFY: call float @floorf( -; DO-SIMPLIFY: call float @ceilf( -; DO-SIMPLIFY: call float @roundf( -; DO-SIMPLIFY: call float @nearbyintf( -; DO-SIMPLIFY: call float @truncf( +; DO-SIMPLIFY: call float @llvm.floor.f32( +; DO-SIMPLIFY: call float @llvm.ceil.f32( +; DO-SIMPLIFY: call float @llvm.round.f32( +; DO-SIMPLIFY: call float @llvm.nearbyint.f32( +; DO-SIMPLIFY: call float @llvm.trunc.f32( ; DO-SIMPLIFY: call float @llvm.fabs.f32( -; DO-SIMPLIFY: call float fast @llvm.fabs.f32( +; DO-SIMPLIFY: call fast float @llvm.fabs.f32( -; C89-SIMPLIFY: call float @floorf( -; C89-SIMPLIFY: call float @ceilf( +; C89-SIMPLIFY: call float @llvm.floor.f32( +; C89-SIMPLIFY: call float @llvm.ceil.f32( ; C89-SIMPLIFY: call double @round( ; C89-SIMPLIFY: call double @nearbyint( -; DONT-SIMPLIFY: call double @floor( -; DONT-SIMPLIFY: call double @ceil( +; DONT-SIMPLIFY: call float @llvm.floor.f32( +; DONT-SIMPLIFY: call float @llvm.ceil.f32( ; DONT-SIMPLIFY: call double @round( ; DONT-SIMPLIFY: call double @nearbyint( ; DONT-SIMPLIFY: call double @trunc( Index: test/Transforms/InstCombine/float-shrink-compare.ll =================================================================== --- test/Transforms/InstCombine/float-shrink-compare.ll +++ test/Transforms/InstCombine/float-shrink-compare.ll @@ -3,99 +3,99 @@ target triple = "x86_64-apple-macosx10.8.0" define i32 @test1(float %x, float %y) nounwind uwtable { - %1 = fpext float %x to double - %2 = call double @ceil(double %1) nounwind readnone - %3 = fpext float %y to double - %4 = fcmp oeq double %2, %3 - %5 = zext i1 %4 to i32 - ret i32 %5 + %x.ext = fpext float %x to double + %ceil = call double @ceil(double %x.ext) nounwind readnone + %ext.y = fpext float %y to double + %cmp = fcmp oeq double %ceil, %ext.y + %cmp.ext = zext i1 %cmp to i32 + ret i32 %cmp.ext ; CHECK-LABEL: @test1( -; CHECK-NEXT: %ceilf = call float @ceilf(float %x) -; CHECK-NEXT: fcmp oeq float %ceilf, %y +; CHECK-NEXT: %ceil = call float @llvm.ceil.f32(float %x) +; CHECK-NEXT: fcmp oeq float %ceil, %y } define i32 @test2(float %x, float %y) nounwind uwtable { - %1 = fpext float %x to double - %2 = call double @fabs(double %1) nounwind readnone - %3 = fpext float %y to double - %4 = fcmp oeq double %2, %3 - %5 = zext i1 %4 to i32 - ret i32 %5 + %x.ext = fpext float %x to double + %fabs = call double @fabs(double %x.ext) nounwind readnone + %y.ext = fpext float %y to double + %cmp = fcmp oeq double %fabs, %y.ext + %cmp.ext = zext i1 %cmp to i32 + ret i32 %cmp.ext ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[FABS:%[0-9]+]] = call float @llvm.fabs.f32(float %x) -; CHECK-NEXT: fcmp oeq float [[FABS]], %y +; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x) +; CHECK-NEXT: fcmp oeq float %fabs, %y } define i32 @test3(float %x, float %y) nounwind uwtable { - %1 = fpext float %x to double - %2 = call double @floor(double %1) nounwind readnone - %3 = fpext float %y to double - %4 = fcmp oeq double %2, %3 - %5 = zext i1 %4 to i32 - ret i32 %5 + %x.ext = fpext float %x to double + %floor = call double @floor(double %x.ext) nounwind readnone + %y.ext = fpext float %y to double + %cmp = fcmp oeq double %floor, %y.ext + %cmp.ext = zext i1 %cmp to i32 + ret i32 %cmp.ext ; CHECK-LABEL: @test3( -; CHECK-NEXT: %floorf = call float @floorf(float %x) -; CHECK-NEXT: fcmp oeq float %floorf, %y +; CHECK-NEXT: %floor = call float @llvm.floor.f32(float %x) +; CHECK-NEXT: fcmp oeq float %floor, %y } define i32 @test4(float %x, float %y) nounwind uwtable { - %1 = fpext float %x to double - %2 = call double @nearbyint(double %1) nounwind - %3 = fpext float %y to double - %4 = fcmp oeq double %2, %3 - %5 = zext i1 %4 to i32 - ret i32 %5 + %x.ext = fpext float %x to double + %nearbyint = call double @nearbyint(double %x.ext) nounwind + %y.ext = fpext float %y to double + %cmp = fcmp oeq double %nearbyint, %y.ext + %cmp.ext = zext i1 %cmp to i32 + ret i32 %cmp.ext ; CHECK-LABEL: @test4( -; CHECK-NEXT: %nearbyintf = call float @nearbyintf(float %x) -; CHECK-NEXT: fcmp oeq float %nearbyintf, %y +; CHECK-NEXT: %nearbyint = call float @llvm.nearbyint.f32(float %x) +; CHECK-NEXT: fcmp oeq float %nearbyint, %y } define i32 @test5(float %x, float %y) nounwind uwtable { - %1 = fpext float %x to double - %2 = call double @rint(double %1) nounwind - %3 = fpext float %y to double - %4 = fcmp oeq double %2, %3 - %5 = zext i1 %4 to i32 - ret i32 %5 + %x.ext = fpext float %x to double + %rint = call double @rint(double %x.ext) nounwind + %y.ext = fpext float %y to double + %cmp = fcmp oeq double %rint, %y.ext + %cmp.ext = zext i1 %cmp to i32 + ret i32 %cmp.ext ; CHECK-LABEL: @test5( -; CHECK-NEXT: %rintf = call float @rintf(float %x) -; CHECK-NEXT: fcmp oeq float %rintf, %y +; CHECK-NEXT: %rint = call float @llvm.rint.f32(float %x) +; CHECK-NEXT: fcmp oeq float %rint, %y } define i32 @test6(float %x, float %y) nounwind uwtable { - %1 = fpext float %x to double - %2 = call double @round(double %1) nounwind readnone - %3 = fpext float %y to double - %4 = fcmp oeq double %2, %3 - %5 = zext i1 %4 to i32 - ret i32 %5 + %x.ext = fpext float %x to double + %round = call double @round(double %x.ext) nounwind readnone + %y.ext = fpext float %y to double + %cmp = fcmp oeq double %round, %y.ext + %cmp.ext = zext i1 %cmp to i32 + ret i32 %cmp.ext ; CHECK-LABEL: @test6( -; CHECK-NEXT: %roundf = call float @roundf(float %x) -; CHECK-NEXT: fcmp oeq float %roundf, %y +; CHECK-NEXT: %round = call float @llvm.round.f32(float %x) +; CHECK-NEXT: fcmp oeq float %round, %y } define i32 @test7(float %x, float %y) nounwind uwtable { - %1 = fpext float %x to double - %2 = call double @trunc(double %1) nounwind - %3 = fpext float %y to double - %4 = fcmp oeq double %2, %3 - %5 = zext i1 %4 to i32 - ret i32 %5 + %x.ext = fpext float %x to double + %trunc = call double @trunc(double %x.ext) nounwind + %y.ext = fpext float %y to double + %cmp = fcmp oeq double %trunc, %y.ext + %cmp.ext = zext i1 %cmp to i32 + ret i32 %cmp.ext ; CHECK-LABEL: @test7( -; CHECK-NEXT: %truncf = call float @truncf(float %x) -; CHECK-NEXT: fcmp oeq float %truncf, %y +; CHECK-NEXT: %trunc = call float @llvm.trunc.f32(float %x) +; CHECK-NEXT: fcmp oeq float %trunc, %y } define i32 @test8(float %x, float %y) nounwind uwtable { - %1 = fpext float %y to double - %2 = fpext float %x to double - %3 = call double @ceil(double %2) nounwind readnone - %4 = fcmp oeq double %1, %3 - %5 = zext i1 %4 to i32 - ret i32 %5 + %x.ext = fpext float %x to double + %y.ext = fpext float %y to double + %ceil = call double @ceil(double %x.ext) nounwind readnone + %cmp = fcmp oeq double %y.ext, %ceil + %cmp.ext = zext i1 %cmp to i32 + ret i32 %cmp.ext ; CHECK-LABEL: @test8( -; CHECK-NEXT: %ceilf = call float @ceilf(float %x) -; CHECK-NEXT: fcmp oeq float %ceilf, %y +; CHECK-NEXT: %ceil = call float @llvm.ceil.f32(float %x) +; CHECK-NEXT: fcmp oeq float %ceil, %y } define i32 @test9(float %x, float %y) nounwind uwtable { @@ -111,63 +111,63 @@ } define i32 @test10(float %x, float %y) nounwind uwtable { - %1 = fpext float %y to double - %2 = fpext float %x to double - %3 = call double @floor(double %2) nounwind readnone - %4 = fcmp oeq double %1, %3 - %5 = zext i1 %4 to i32 - ret i32 %5 + %x.ext = fpext float %x to double + %y.ext = fpext float %y to double + %floor = call double @floor(double %x.ext) nounwind readnone + %cmp = fcmp oeq double %floor, %y.ext + %cmp.ext = zext i1 %cmp to i32 + ret i32 %cmp.ext ; CHECK-LABEL: @test10( -; CHECK-NEXT: %floorf = call float @floorf(float %x) -; CHECK-NEXT: fcmp oeq float %floorf, %y +; CHECK-NEXT: %floor = call float @llvm.floor.f32(float %x) +; CHECK-NEXT: fcmp oeq float %floor, %y } define i32 @test11(float %x, float %y) nounwind uwtable { - %1 = fpext float %y to double - %2 = fpext float %x to double - %3 = call double @nearbyint(double %2) nounwind - %4 = fcmp oeq double %1, %3 - %5 = zext i1 %4 to i32 - ret i32 %5 + %x.ext = fpext float %x to double + %y.ext = fpext float %y to double + %nearbyint = call double @nearbyint(double %x.ext) nounwind + %cmp = fcmp oeq double %nearbyint, %y.ext + %cmp.ext = zext i1 %cmp to i32 + ret i32 %cmp.ext ; CHECK-LABEL: @test11( -; CHECK-NEXT: %nearbyintf = call float @nearbyintf(float %x) -; CHECK-NEXT: fcmp oeq float %nearbyintf, %y +; CHECK-NEXT: %nearbyint = call float @llvm.nearbyint.f32(float %x) +; CHECK-NEXT: fcmp oeq float %nearbyint, %y } define i32 @test12(float %x, float %y) nounwind uwtable { - %1 = fpext float %y to double - %2 = fpext float %x to double - %3 = call double @rint(double %2) nounwind - %4 = fcmp oeq double %1, %3 - %5 = zext i1 %4 to i32 - ret i32 %5 + %x.ext = fpext float %x to double + %y.ext = fpext float %y to double + %rint = call double @rint(double %x.ext) nounwind + %cmp = fcmp oeq double %y.ext, %rint + %cmp.ext = zext i1 %cmp to i32 + ret i32 %cmp.ext ; CHECK-LABEL: @test12( -; CHECK-NEXT: %rintf = call float @rintf(float %x) -; CHECK-NEXT: fcmp oeq float %rintf, %y +; CHECK-NEXT: %rint = call float @llvm.rint.f32(float %x) +; CHECK-NEXT: fcmp oeq float %rint, %y } define i32 @test13(float %x, float %y) nounwind uwtable { - %1 = fpext float %y to double - %2 = fpext float %x to double - %3 = call double @round(double %2) nounwind readnone - %4 = fcmp oeq double %1, %3 - %5 = zext i1 %4 to i32 - ret i32 %5 + %x.ext = fpext float %x to double + %y.ext = fpext float %y to double + %round = call double @round(double %x.ext) nounwind readnone + %cmp = fcmp oeq double %y.ext, %round + %cmp.ext = zext i1 %cmp to i32 + ret i32 %cmp.ext ; CHECK-LABEL: @test13( -; CHECK-NEXT: %roundf = call float @roundf(float %x) -; CHECK-NEXT: fcmp oeq float %roundf, %y +; CHECK-NEXT: %round = call float @llvm.round.f32(float %x) +; CHECK-NEXT: fcmp oeq float %round, %y } define i32 @test14(float %x, float %y) nounwind uwtable { - %1 = fpext float %y to double - %2 = fpext float %x to double - %3 = call double @trunc(double %2) nounwind - %4 = fcmp oeq double %1, %3 - %5 = zext i1 %4 to i32 - ret i32 %5 + %x.ext = fpext float %x to double + %y.ext = fpext float %y to double + %trunc = call double @trunc(double %x.ext) nounwind + %cmp = fcmp oeq double %y.ext, %trunc + %cmp.ext = zext i1 %cmp to i32 + ret i32 %cmp.ext ; CHECK-LABEL: @test14( -; CHECK-NEXT: %truncf = call float @truncf(float %x) -; CHECK-NEXT: fcmp oeq float %truncf, %y +; CHECK-NEXT: %trunc = call float @llvm.trunc.f32(float %x) +; CHECK-NEXT: fcmp oeq float %trunc, %y } define i32 @test15(float %x, float %y, float %z) nounwind uwtable { Index: test/Transforms/InstCombine/win-math.ll =================================================================== --- test/Transforms/InstCombine/win-math.ll +++ test/Transforms/InstCombine/win-math.ll @@ -56,15 +56,15 @@ define float @float_ceil(float %x) nounwind readnone { ; WIN32-LABEL: @float_ceil( ; WIN32-NOT: float @ceilf -; WIN32: double @ceil +; WIN32: float @llvm.ceil.f32 ; WIN64-LABEL: @float_ceil( -; WIN64: float @ceilf +; WIN64: float @llvm.ceil.f32 ; WIN64-NOT: double @ceil ; MINGW32-LABEL: @float_ceil( -; MINGW32: float @ceilf +; MINGW32: float @llvm.ceil.f32 ; MINGW32-NOT: double @ceil ; MINGW64-LABEL: @float_ceil( -; MINGW64: float @ceilf +; MINGW64: float @llvm.ceil.f32 ; MINGW64-NOT: double @ceil %1 = fpext float %x to double %2 = call double @ceil(double %1) @@ -137,15 +137,15 @@ define float @float_floor(float %x) nounwind readnone { ; WIN32-LABEL: @float_floor( ; WIN32-NOT: float @floorf -; WIN32: double @floor +; WIN32: float @llvm.floor.f32 ; WIN64-LABEL: @float_floor( -; WIN64: float @floorf +; WIN64: float @llvm.floor.f32 ; WIN64-NOT: double @floor ; MINGW32-LABEL: @float_floor( -; MINGW32: float @floorf +; MINGW32: float @llvm.floor.f32 ; MINGW32-NOT: double @floor ; MINGW64-LABEL: @float_floor( -; MINGW64: float @floorf +; MINGW64: float @llvm.floor.f32 ; MINGW64-NOT: double @floor %1 = fpext float %x to double %2 = call double @floor(double %1) @@ -262,10 +262,10 @@ ; WIN64-NOT: float @roundf ; WIN64: double @round ; MINGW32-LABEL: @float_round( -; MINGW32: float @roundf +; MINGW32: float @llvm.round.f32 ; MINGW32-NOT: double @round ; MINGW64-LABEL: @float_round( -; MINGW64: float @roundf +; MINGW64: float @llvm.round.f32 ; MINGW64-NOT: double @round %1 = fpext float %x to double %2 = call double @round(double %1)