Index: include/llvm/IR/PatternMatch.h =================================================================== --- include/llvm/IR/PatternMatch.h +++ include/llvm/IR/PatternMatch.h @@ -826,6 +826,18 @@ return CastClass_match(Op); } +/// \brief Matches FPTrunc +template +inline CastClass_match m_FPTrunc(const OpTy &Op) { + return CastClass_match(Op); +} + +/// \brief Matches FPExt +template +inline CastClass_match m_FPExt(const OpTy &Op) { + return CastClass_match(Op); +} + //===----------------------------------------------------------------------===// // Matchers for unary operators // Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1631,6 +1631,18 @@ return SelectInst::Create(Cond, Call0, Call1); } + Value *ExtSrc; + if (match(II->getArgOperand(0), m_FPExt(m_Value(ExtSrc))) && + II->getArgOperand(0)->hasOneUse()) { + // fabs (fpext x) -> fpext (fabs x) + Value *F = Intrinsic::getDeclaration(II->getModule(), Intrinsic::fabs, + { ExtSrc->getType() }); + CallInst *NewFabs = Builder->CreateCall(F, ExtSrc); + NewFabs->copyFastMathFlags(II); + NewFabs->takeName(II); + return new FPExtInst(NewFabs, II->getType()); + } + break; } case Intrinsic::cos: Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1392,21 +1392,24 @@ IntrinsicInst *II = dyn_cast(CI.getOperand(0)); if (II) { switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::fabs: { - // (fptrunc (fabs x)) -> (fabs (fptrunc x)) - Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0), - CI.getType()); - Type *IntrinsicType[] = { CI.getType() }; - Function *Overload = Intrinsic::getDeclaration( - CI.getModule(), II->getIntrinsicID(), IntrinsicType); - - SmallVector OpBundles; - II->getOperandBundlesAsDefs(OpBundles); - - Value *Args[] = { InnerTrunc }; - return CallInst::Create(Overload, Args, OpBundles, II->getName()); - } + default: break; + case Intrinsic::fabs: { + // (fptrunc (fabs x)) -> (fabs (fptrunc x)) + Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0), + CI.getType()); + Type *IntrinsicType[] = { CI.getType() }; + Function *Overload = Intrinsic::getDeclaration( + CI.getModule(), II->getIntrinsicID(), IntrinsicType); + + SmallVector OpBundles; + II->getOperandBundlesAsDefs(OpBundles); + + Value *Args[] = { InnerTrunc }; + CallInst *NewCI = CallInst::Create(Overload, Args, + OpBundles, II->getName()); + NewCI->copyFastMathFlags(II); + return NewCI; + } } } Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1210,11 +1210,15 @@ Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - StringRef Name = Callee->getName(); - if (Name == "fabs" && hasFloatVersion(Name)) - return optimizeUnaryDoubleFP(CI, B, false); + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); - return nullptr; + // fabs/fabsf -> llvm.fabs.* + Value *F = Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::fabs, + CI->getType()); + Value *NewCall = B.CreateCall(F, { CI->getArgOperand(0) }); + NewCall->takeName(CI); + return NewCall; } Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { @@ -2029,8 +2033,6 @@ return optimizePow(CI, Builder); case Intrinsic::exp2: return optimizeExp2(CI, Builder); - case Intrinsic::fabs: - return optimizeFabs(CI, Builder); case Intrinsic::log: return optimizeLog(CI, Builder); case Intrinsic::sqrt: Index: test/Transforms/InstCombine/double-float-shrink-2.ll =================================================================== --- test/Transforms/InstCombine/double-float-shrink-2.ll +++ test/Transforms/InstCombine/double-float-shrink-2.ll @@ -10,7 +10,8 @@ ; DO-SIMPLIFY: call float @roundf( ; DO-SIMPLIFY: call float @nearbyintf( ; DO-SIMPLIFY: call float @truncf( -; DO-SIMPLIFY: call float @fabsf( +; DO-SIMPLIFY: call float @llvm.fabs.f32( +; DO-SIMPLIFY: call float fast @llvm.fabs.f32( ; C89-SIMPLIFY: call float @floorf( ; C89-SIMPLIFY: call float @ceilf( @@ -22,7 +23,10 @@ ; DONT-SIMPLIFY: call double @round( ; DONT-SIMPLIFY: call double @nearbyint( ; DONT-SIMPLIFY: call double @trunc( -; DONT-SIMPLIFY: call double @fabs( + +; This is replaced with the intrinsic, which does the right thing on +; all platforms. +; DONT-SIMPLIFY: call float @llvm.fabs.f32( declare double @floor(double) declare double @ceil(double) @@ -30,6 +34,7 @@ declare double @nearbyint(double) declare double @trunc(double) declare double @fabs(double) +declare double @llvm.fabs.f64(double) define float @test_floor(float %C) { %D = fpext float %C to double @@ -78,3 +83,12 @@ %F = fptrunc double %E to float ret float %F } + +; Make sure fast math flags are preserved +define float @test_fabs_fast(float %C) { + %D = fpext float %C to double + ; --> fabsf + %E = call fast double @fabs(double %D) + %F = fptrunc double %E to float + ret float %F +} Index: test/Transforms/InstCombine/fabs-libcall.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/fabs-libcall.ll @@ -0,0 +1,21 @@ +; RUN: opt -S -mtriple=i686-apple-macosx -instcombine %s | FileCheck %s + +declare x86_fp80 @fabsl(x86_fp80) + +; CHECK-LABEL: @replace_fabs_call_f80( +; CHECK-NEXT: %fabsl = call x86_fp80 @llvm.fabs.f80(x86_fp80 %x) +; CHECK-NEXT: ret x86_fp80 %fabsl +define x86_fp80 @replace_fabs_call_f80(x86_fp80 %x) { + %fabsl = tail call x86_fp80 @fabsl(x86_fp80 %x) + ret x86_fp80 %fabsl + +} + +; CHECK-LABEL: @fmf_replace_fabs_call_f80( +; CHECK-NEXT: %fabsl = call nnan x86_fp80 @llvm.fabs.f80(x86_fp80 %x) +; CHECK-NEXT: ret x86_fp80 %fabsl +define x86_fp80 @fmf_replace_fabs_call_f80(x86_fp80 %x) { + %fabsl = tail call nnan x86_fp80 @fabsl(x86_fp80 %x) + ret x86_fp80 %fabsl +} + Index: test/Transforms/InstCombine/fabs.ll =================================================================== --- test/Transforms/InstCombine/fabs.ll +++ test/Transforms/InstCombine/fabs.ll @@ -1,6 +1,10 @@ -; RUN: opt < %s -instcombine -S | FileCheck %s +; RUN: opt -mtriple=x86_64-unknown-linux-gnu < %s -instcombine -S | FileCheck %s -; Make sure all library calls are eliminated when the input is known positive. +; Make sure libcalls are replaced with intrinsic calls. + +declare float @llvm.fabs.f32(float) +declare double @llvm.fabs.f64(double) +declare fp128 @llvm.fabs.f128(fp128) declare float @fabsf(float) declare double @fabs(double) @@ -8,46 +12,46 @@ declare float @llvm.fma.f32(float, float, float) declare float @llvm.fmuladd.f32(float, float, float) -define float @square_fabs_call_f32(float %x) { - %mul = fmul float %x, %x - %fabsf = tail call float @fabsf(float %mul) +define float @replace_fabs_call_f32(float %x) { + %fabsf = tail call float @fabsf(float %x) ret float %fabsf -; CHECK-LABEL: square_fabs_call_f32( -; CHECK-NEXT: %mul = fmul float %x, %x -; CHECK-NEXT: %fabsf = tail call float @fabsf(float %mul) +; CHECK-LABEL: @replace_fabs_call_f32( +; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %x) ; CHECK-NEXT: ret float %fabsf } -define double @square_fabs_call_f64(double %x) { - %mul = fmul double %x, %x - %fabs = tail call double @fabs(double %mul) +define double @replace_fabs_call_f64(double %x) { + %fabs = tail call double @fabs(double %x) ret double %fabs -; CHECK-LABEL: square_fabs_call_f64( -; CHECK-NEXT: %mul = fmul double %x, %x -; CHECK-NEXT: %fabs = tail call double @fabs(double %mul) +; CHECK-LABEL: @replace_fabs_call_f64( +; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x) ; CHECK-NEXT: ret double %fabs } -define fp128 @square_fabs_call_f128(fp128 %x) { - %mul = fmul fp128 %x, %x - %fabsl = tail call fp128 @fabsl(fp128 %mul) +define fp128 @replace_fabs_call_f128(fp128 %x) { + %fabsl = tail call fp128 @fabsl(fp128 %x) ret fp128 %fabsl -; CHECK-LABEL: square_fabs_call_f128( -; CHECK-NEXT: %mul = fmul fp128 %x, %x -; CHECK-NEXT: %fabsl = tail call fp128 @fabsl(fp128 %mul) +; CHECK-LABEL: replace_fabs_call_f128( +; CHECK-NEXT: %fabsl = call fp128 @llvm.fabs.f128(fp128 %x) ; CHECK-NEXT: ret fp128 %fabsl } +; Make sure fast math flags are preserved when replacing the libcall. +define float @fmf_replace_fabs_call_f32(float %x) { + %fabsf = tail call nnan float @fabsf(float %x) + ret float %fabsf + +; CHECK-LABEL: @fmf_replace_fabs_call_f32( +; CHECK-NEXT: %fabsf = call nnan float @llvm.fabs.f32(float %x) +; CHECK-NEXT: ret float %fabsf +} + ; Make sure all intrinsic calls are eliminated when the input is known ; positive. -declare float @llvm.fabs.f32(float) -declare double @llvm.fabs.f64(double) -declare fp128 @llvm.fabs.f128(fp128) - ; The fabs cannot be eliminated because %x may be a NaN define float @square_fabs_intrinsic_f32(float %x) { %mul = fmul float %x, %x @@ -102,10 +106,8 @@ ret float %trunc ; CHECK-LABEL: square_fabs_shrink_call1( -; CHECK-NEXT: %ext = fpext float %x to double -; CHECK-NEXT: %sq = fmul double %ext, %ext -; CHECK-NEXT: call double @fabs(double %sq) -; CHECK-NEXT: %trunc = fptrunc double %fabs to float +; CHECK-NEXT: fmul float %x, %x +; CHECK-NEXT: %trunc = call float @llvm.fabs.f32(float ; CHECK-NEXT: ret float %trunc } @@ -118,8 +120,8 @@ ; CHECK-LABEL: square_fabs_shrink_call2( ; CHECK-NEXT: %sq = fmul float %x, %x -; CHECK-NEXT: %fabsf = call float @fabsf(float %sq) -; CHECK-NEXT: ret float %fabsf +; CHECK-NEXT: %trunc = call float @llvm.fabs.f32(float %sq) +; CHECK-NEXT: ret float %trunc } ; CHECK-LABEL: @fabs_select_constant_negative_positive( @@ -214,3 +216,16 @@ ; CHECK-NEXT: %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00) ; CHECK-NEXT: ret float %fmuladd } + +; Don't introduce a second fpext +; CHECK-LABEL: @multi_use_fabs_fpext( +; CHECK: %fpext = fpext float %x to double +; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %fpext) +; CHECK-NEXT: store volatile double %fpext, double* undef, align 8 +; CHECK-NEXT: ret double %fabs +define double @multi_use_fabs_fpext(float %x) { + %fpext = fpext float %x to double + %fabs = call double @llvm.fabs.f64(double %fpext) + store volatile double %fpext, double* undef + ret double %fabs +} Index: test/Transforms/InstCombine/float-shrink-compare.ll =================================================================== --- test/Transforms/InstCombine/float-shrink-compare.ll +++ test/Transforms/InstCombine/float-shrink-compare.ll @@ -22,8 +22,20 @@ %5 = zext i1 %4 to i32 ret i32 %5 ; CHECK-LABEL: @test2( -; CHECK-NEXT: %fabsf = call float @fabsf(float %x) -; CHECK-NEXT: fcmp oeq float %fabsf, %y +; CHECK-NEXT: [[FABS:%[0-9]+]] = call float @llvm.fabs.f32(float %x) +; CHECK-NEXT: fcmp oeq float [[FABS]], %y +} + +define i32 @fmf_test2(float %x, float %y) nounwind uwtable { + %1 = fpext float %x to double + %2 = call nnan double @fabs(double %1) nounwind readnone + %3 = fpext float %y to double + %4 = fcmp oeq double %2, %3 + %5 = zext i1 %4 to i32 + ret i32 %5 +; CHECK-LABEL: @fmf_test2( +; CHECK-NEXT: [[FABS:%[0-9]+]] = call nnan float @llvm.fabs.f32(float %x) +; CHECK-NEXT: fcmp oeq float [[FABS]], %y } define i32 @test3(float %x, float %y) nounwind uwtable { @@ -99,15 +111,15 @@ } define i32 @test9(float %x, float %y) nounwind uwtable { - %1 = fpext float %y to double - %2 = fpext float %x to double - %3 = call double @fabs(double %2) nounwind readnone - %4 = fcmp oeq double %1, %3 - %5 = zext i1 %4 to i32 - ret i32 %5 + %x.ext = fpext float %x to double + %y.ext = fpext float %y to double + %fabs = call double @fabs(double %x.ext) nounwind readnone + %cmp = fcmp oeq double %y.ext, %fabs + %cmp.ext = zext i1 %cmp to i32 + ret i32 %cmp.ext ; CHECK-LABEL: @test9( -; CHECK-NEXT: %fabsf = call float @fabsf(float %x) -; CHECK-NEXT: fcmp oeq float %fabsf, %y +; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x) +; CHECK-NEXT: fcmp oeq float %fabs, %y } define i32 @test10(float %x, float %y) nounwind uwtable { Index: test/Transforms/InstCombine/pow-1.ll =================================================================== --- test/Transforms/InstCombine/pow-1.ll +++ test/Transforms/InstCombine/pow-1.ll @@ -72,7 +72,7 @@ ; CHECK-LABEL: @test_simplify7( %retval = call float @powf(float %x, float 0.5) ; CHECK-NEXT: [[SQRTF:%[a-z0-9]+]] = call float @sqrtf(float %x) [[NUW_RO:#[0-9]+]] -; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @fabsf(float [[SQRTF]]) [[NUW_RO]] +; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @llvm.fabs.f32(float [[SQRTF]]) ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq float %x, 0xFFF0000000000000 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], float 0x7FF0000000000000, float [[FABSF]] ret float %retval @@ -83,7 +83,7 @@ ; CHECK-LABEL: @test_simplify8( %retval = call double @pow(double %x, double 0.5) ; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) [[NUW_RO]] -; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) [[NUW_RO]] +; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @llvm.fabs.f64(double [[SQRT]]) ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]] ret double %retval @@ -163,7 +163,7 @@ ; CHECK-LABEL: @test_simplify17( %retval = call double @llvm.pow.f64(double %x, double 0.5) ; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) -; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) +; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @llvm.fabs.f64(double [[SQRT]]) ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]] ret double %retval Index: test/Transforms/InstCombine/win-math.ll =================================================================== --- test/Transforms/InstCombine/win-math.ll +++ test/Transforms/InstCombine/win-math.ll @@ -284,11 +284,11 @@ ; WIN64: float @powf ; MINGW32-LABEL: @float_powsqrt( ; MINGW32: float @sqrtf -; MINGW32: float @fabsf +; MINGW32: float @llvm.fabs.f32 ; MINGW32-NOT: float @powf ; MINGW64-LABEL: @float_powsqrt( ; MINGW64: float @sqrtf -; MINGW64: float @fabsf +; MINGW64: float @llvm.fabs.f32( ; MINGW64-NOT: float @powf %1 = call float @powf(float %x, float 0.5) ret float %1 Index: test/Transforms/InstCombine/zero-point-zero-add.ll =================================================================== --- test/Transforms/InstCombine/zero-point-zero-add.ll +++ test/Transforms/InstCombine/zero-point-zero-add.ll @@ -15,7 +15,7 @@ define double @test1(double %X) { ; CHECK-LABEL: @test1( -; CHECK-NEXT: [[Y:%.*]] = call double @fabs(double %X) +; CHECK-NEXT: [[Y:%.*]] = call double @llvm.fabs.f64(double %X) ; CHECK-NEXT: ret double [[Y]] ; %Y = call double @fabs(double %X)