Index: include/llvm/IR/PatternMatch.h =================================================================== --- include/llvm/IR/PatternMatch.h +++ include/llvm/IR/PatternMatch.h @@ -826,6 +826,18 @@ return CastClass_match(Op); } +/// \brief Matches FPTrunc +template +inline CastClass_match m_FPTrunc(const OpTy &Op) { + return CastClass_match(Op); +} + +/// \brief Matches FPExt +template +inline CastClass_match m_FPExt(const OpTy &Op) { + return CastClass_match(Op); +} + //===----------------------------------------------------------------------===// // Matchers for unary operators // Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1635,6 +1635,17 @@ return SelectInst::Create(Cond, Call0, Call1); } + Value *ExtSrc; + if (match(II->getArgOperand(0), m_FPExt(m_Value(ExtSrc))) && + II->getArgOperand(0)->hasOneUse()) { + // fabs (fpext x) -> fpext (fabs x) + Value *F = Intrinsic::getDeclaration(II->getModule(), Intrinsic::fabs, + { ExtSrc->getType() }); + Value *NewFabs = Builder->CreateCall(F, ExtSrc); + NewFabs->takeName(II); + return new FPExtInst(NewFabs, II->getType()); + } + break; } case Intrinsic::cos: Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1392,21 +1392,24 @@ IntrinsicInst *II = dyn_cast(CI.getOperand(0)); if (II) { switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::fabs: { - // (fptrunc (fabs x)) -> (fabs (fptrunc x)) - Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0), - CI.getType()); - Type *IntrinsicType[] = { CI.getType() }; - Function *Overload = Intrinsic::getDeclaration( - CI.getModule(), II->getIntrinsicID(), IntrinsicType); - - SmallVector OpBundles; - II->getOperandBundlesAsDefs(OpBundles); - - Value *Args[] = { InnerTrunc }; - return CallInst::Create(Overload, Args, OpBundles, II->getName()); - } + default: break; + case Intrinsic::fabs: { + // (fptrunc (fabs x)) -> (fabs (fptrunc x)) + Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0), + CI.getType()); + Type *IntrinsicType[] = { CI.getType() }; + Function *Overload = Intrinsic::getDeclaration( + CI.getModule(), II->getIntrinsicID(), IntrinsicType); + + SmallVector OpBundles; + II->getOperandBundlesAsDefs(OpBundles); + + Value *Args[] = { InnerTrunc }; + CallInst *NewCI = CallInst::Create(Overload, Args, + OpBundles, II->getName()); + NewCI->copyFastMathFlags(II); + return NewCI; + } } } Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1189,11 +1189,13 @@ Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - StringRef Name = Callee->getName(); - if (Name == "fabs" && hasFloatVersion(Name)) - return optimizeUnaryDoubleFP(CI, B, false); - return nullptr; + // fabs/fabsf -> llvm.fabs.* + Value *F = Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::fabs, + CI->getType()); + Value *NewCall = B.CreateCall(F, { CI->getArgOperand(0) }); + NewCall->takeName(CI); + return NewCall; } Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { @@ -2008,8 +2010,6 @@ return optimizePow(CI, Builder); case Intrinsic::exp2: return optimizeExp2(CI, Builder); - case Intrinsic::fabs: - return optimizeFabs(CI, Builder); case Intrinsic::log: return optimizeLog(CI, Builder); case Intrinsic::sqrt: Index: test/Transforms/InstCombine/double-float-shrink-2.ll =================================================================== --- test/Transforms/InstCombine/double-float-shrink-2.ll +++ test/Transforms/InstCombine/double-float-shrink-2.ll @@ -10,7 +10,8 @@ ; DO-SIMPLIFY: call float @roundf( ; DO-SIMPLIFY: call float @nearbyintf( ; DO-SIMPLIFY: call float @truncf( -; DO-SIMPLIFY: call float @fabsf( +; DO-SIMPLIFY: call float @llvm.fabs.f32( +; DO-SIMPLIFY: call float fast @llvm.fabs.f32( ; C89-SIMPLIFY: call float @floorf( ; C89-SIMPLIFY: call float @ceilf( @@ -22,7 +23,10 @@ ; DONT-SIMPLIFY: call double @round( ; DONT-SIMPLIFY: call double @nearbyint( ; DONT-SIMPLIFY: call double @trunc( -; DONT-SIMPLIFY: call double @fabs( + +; This is replaced with the intrinsic, which does the right thing on +; all platforms. +; DONT-SIMPLIFY: call float @llvm.fabs.f32( declare double @floor(double) declare double @ceil(double) @@ -30,6 +34,7 @@ declare double @nearbyint(double) declare double @trunc(double) declare double @fabs(double) +declare double @llvm.fabs.f64(double) define float @test_floor(float %C) { %D = fpext float %C to double @@ -78,3 +83,12 @@ %F = fptrunc double %E to float ret float %F } + +; Make sure fast math flags are preserved +define float @test_fabs_fast(float %C) { + %D = fpext float %C to double + ; --> fabsf + %E = call fast double @fabs(double %D) + %F = fptrunc double %E to float + ret float %F +} Index: test/Transforms/InstCombine/fabs.ll =================================================================== --- test/Transforms/InstCombine/fabs.ll +++ test/Transforms/InstCombine/fabs.ll @@ -1,51 +1,45 @@ ; RUN: opt < %s -instcombine -S | FileCheck %s -; Make sure all library calls are eliminated when the input is known positive. +; Make sure libcalls are replaced with intrinsic calls. + +declare float @llvm.fabs.f32(float) +declare double @llvm.fabs.f64(double) +declare fp128 @llvm.fabs.f128(fp128) declare float @fabsf(float) declare double @fabs(double) declare fp128 @fabsl(fp128) -define float @square_fabs_call_f32(float %x) { - %mul = fmul float %x, %x - %fabsf = tail call float @fabsf(float %mul) +define float @replace_fabs_call_f32(float %x) { + %fabsf = tail call float @fabsf(float %x) ret float %fabsf -; CHECK-LABEL: square_fabs_call_f32( -; CHECK-NEXT: %mul = fmul float %x, %x -; CHECK-NEXT: %fabsf = tail call float @fabsf(float %mul) +; CHECK-LABEL: @replace_fabs_call_f32( +; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %x) ; CHECK-NEXT: ret float %fabsf } -define double @square_fabs_call_f64(double %x) { - %mul = fmul double %x, %x - %fabs = tail call double @fabs(double %mul) +define double @replace_fabs_call_f64(double %x) { + %fabs = tail call double @fabs(double %x) ret double %fabs -; CHECK-LABEL: square_fabs_call_f64( -; CHECK-NEXT: %mul = fmul double %x, %x -; CHECK-NEXT: %fabs = tail call double @fabs(double %mul) +; CHECK-LABEL: @replace_fabs_call_f64( +; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x) ; CHECK-NEXT: ret double %fabs } -define fp128 @square_fabs_call_f128(fp128 %x) { - %mul = fmul fp128 %x, %x - %fabsl = tail call fp128 @fabsl(fp128 %mul) +define fp128 @replace_fabs_call_f128(fp128 %x) { + %fabsl = tail call fp128 @fabsl(fp128 %x) ret fp128 %fabsl -; CHECK-LABEL: square_fabs_call_f128( -; CHECK-NEXT: %mul = fmul fp128 %x, %x -; CHECK-NEXT: %fabsl = tail call fp128 @fabsl(fp128 %mul) +; CHECK-LABEL: replace_fabs_call_f128( +; CHECK-NEXT: %fabsl = call fp128 @llvm.fabs.f128(fp128 %x) ; CHECK-NEXT: ret fp128 %fabsl } ; Make sure all intrinsic calls are eliminated when the input is known ; positive. -declare float @llvm.fabs.f32(float) -declare double @llvm.fabs.f64(double) -declare fp128 @llvm.fabs.f128(fp128) - ; The fabs cannot be eliminated because %x may be a NaN define float @square_fabs_intrinsic_f32(float %x) { %mul = fmul float %x, %x @@ -102,10 +96,8 @@ ret float %trunc ; CHECK-LABEL: square_fabs_shrink_call1( -; CHECK-NEXT: %ext = fpext float %x to double -; CHECK-NEXT: %sq = fmul double %ext, %ext -; CHECK-NEXT: call double @fabs(double %sq) -; CHECK-NEXT: %trunc = fptrunc double %fabs to float +; CHECK-NEXT: fmul float %x, %x +; CHECK-NEXT: %trunc = call float @llvm.fabs.f32(float ; CHECK-NEXT: ret float %trunc } @@ -118,8 +110,8 @@ ; CHECK-LABEL: square_fabs_shrink_call2( ; CHECK-NEXT: %sq = fmul float %x, %x -; CHECK-NEXT: %fabsf = call float @fabsf(float %sq) -; CHECK-NEXT: ret float %fabsf +; CHECK-NEXT: %trunc = call float @llvm.fabs.f32(float %sq) +; CHECK-NEXT: ret float %trunc } ; CHECK-LABEL: @fabs_select_constant_negative_positive( @@ -170,3 +162,16 @@ %fabs = call float @llvm.fabs.f32(float %select) ret float %fabs } + +; Don't introduce a second fpext +; CHECK-LABEL: @multi_use_fabs_fpext( +; CHECK: %fpext = fpext float %x to double +; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %fpext) +; CHECK-NEXT: store volatile double %fpext, double* undef, align 8 +; CHECK-NEXT: ret double %fabs +define double @multi_use_fabs_fpext(float %x) { + %fpext = fpext float %x to double + %fabs = call double @llvm.fabs.f64(double %fpext) + store volatile double %fpext, double* undef + ret double %fabs +} Index: test/Transforms/InstCombine/float-shrink-compare.ll =================================================================== --- test/Transforms/InstCombine/float-shrink-compare.ll +++ test/Transforms/InstCombine/float-shrink-compare.ll @@ -22,8 +22,8 @@ %5 = zext i1 %4 to i32 ret i32 %5 ; CHECK-LABEL: @test2( -; CHECK-NEXT: %fabsf = call float @fabsf(float %x) -; CHECK-NEXT: fcmp oeq float %fabsf, %y +; CHECK-NEXT: [[FABS:%[0-9]+]] = call float @llvm.fabs.f32(float %x) +; CHECK-NEXT: fcmp oeq float [[FABS]], %y } define i32 @test3(float %x, float %y) nounwind uwtable { @@ -106,8 +106,8 @@ %5 = zext i1 %4 to i32 ret i32 %5 ; CHECK-LABEL: @test9( -; CHECK-NEXT: %fabsf = call float @fabsf(float %x) -; CHECK-NEXT: fcmp oeq float %fabsf, %y +; CHECK-NEXT: [[FABS:%[0-9]+]] = call float @llvm.fabs.f32(float %x) +; CHECK-NEXT: fcmp oeq float [[FABS]], %y } define i32 @test10(float %x, float %y) nounwind uwtable { Index: test/Transforms/InstCombine/pow-1.ll =================================================================== --- test/Transforms/InstCombine/pow-1.ll +++ test/Transforms/InstCombine/pow-1.ll @@ -72,7 +72,7 @@ ; CHECK-LABEL: @test_simplify7( %retval = call float @powf(float %x, float 0.5) ; CHECK-NEXT: [[SQRTF:%[a-z0-9]+]] = call float @sqrtf(float %x) [[NUW_RO:#[0-9]+]] -; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @fabsf(float [[SQRTF]]) [[NUW_RO]] +; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @llvm.fabs.f32(float [[SQRTF]]) ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq float %x, 0xFFF0000000000000 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], float 0x7FF0000000000000, float [[FABSF]] ret float %retval @@ -83,7 +83,7 @@ ; CHECK-LABEL: @test_simplify8( %retval = call double @pow(double %x, double 0.5) ; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) [[NUW_RO]] -; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) [[NUW_RO]] +; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @llvm.fabs.f64(double [[SQRT]]) ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]] ret double %retval @@ -163,7 +163,7 @@ ; CHECK-LABEL: @test_simplify17( %retval = call double @llvm.pow.f64(double %x, double 0.5) ; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) -; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) +; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @llvm.fabs.f64(double [[SQRT]]) ; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000 ; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]] ret double %retval Index: test/Transforms/InstCombine/win-math.ll =================================================================== --- test/Transforms/InstCombine/win-math.ll +++ test/Transforms/InstCombine/win-math.ll @@ -284,11 +284,11 @@ ; WIN64: float @powf ; MINGW32-LABEL: @float_powsqrt( ; MINGW32: float @sqrtf -; MINGW32: float @fabsf +; MINGW32: float @llvm.fabs.f32 ; MINGW32-NOT: float @powf ; MINGW64-LABEL: @float_powsqrt( ; MINGW64: float @sqrtf -; MINGW64: float @fabsf +; MINGW64: float @llvm.fabs.f32( ; MINGW64-NOT: float @powf %1 = call float @powf(float %x, float 0.5) ret float %1 Index: test/Transforms/InstCombine/zero-point-zero-add.ll =================================================================== --- test/Transforms/InstCombine/zero-point-zero-add.ll +++ test/Transforms/InstCombine/zero-point-zero-add.ll @@ -15,7 +15,7 @@ define double @test1(double %X) { ; CHECK-LABEL: @test1( -; CHECK-NEXT: [[Y:%.*]] = call double @fabs(double %X) +; CHECK-NEXT: [[Y:%.*]] = call double @llvm.fabs.f64(double %X) ; CHECK-NEXT: ret double [[Y]] ; %Y = call double @fabs(double %X)