Index: llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -92,6 +92,7 @@ Value *optimizeCos(CallInst *CI, IRBuilder<> &B); Value *optimizePow(CallInst *CI, IRBuilder<> &B); Value *optimizeExp2(CallInst *CI, IRBuilder<> &B); + Value *optimizeFabs(CallInst *CI, IRBuilder<> &B); Value *optimizeSinCosPi(CallInst *CI, IRBuilder<> &B); // Integer Library Call Optimizations Index: llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1230,6 +1230,30 @@ return Ret; } +Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + + Value *Ret = nullptr; + if (Callee->getName() == "fabs" && TLI->has(LibFunc::fabsf)) { + Ret = optimizeUnaryDoubleFP(CI, B, false); + } + + FunctionType *FT = Callee->getFunctionType(); + // Make sure this has 1 argument of FP type which matches the result type. + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isFloatingPointTy()) + return Ret; + + Value *Op = CI->getArgOperand(0); + if (Instruction *I = dyn_cast(Op)) { + // Fold fabs(x * x) -> x * x; any squared FP value must already be positive. + if (I->getOpcode() == Instruction::FMul) + if (I->getOperand(0) == I->getOperand(1)) + return Op; + } + return Ret; +} + static bool isTrigLibCall(CallInst *CI); static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, bool UseFloat, Value *&Sin, Value *&Cos, @@ -1893,6 +1917,8 @@ return optimizePow(CI, Builder); case Intrinsic::exp2: return optimizeExp2(CI, Builder); + case Intrinsic::fabs: + return optimizeFabs(CI, Builder); default: return nullptr; } @@ -1965,6 +1991,10 @@ case LibFunc::exp2: case LibFunc::exp2f: return optimizeExp2(CI, Builder); + case LibFunc::fabsf: + case LibFunc::fabs: + case LibFunc::fabsl: + return optimizeFabs(CI, Builder); case LibFunc::ffs: case LibFunc::ffsl: case LibFunc::ffsll: @@ -1999,7 +2029,6 @@ case LibFunc::fputc: return optimizeErrorReporting(CI, Builder, 1); case LibFunc::ceil: - case LibFunc::fabs: case LibFunc::floor: case LibFunc::rint: case LibFunc::round: Index: llvm/trunk/test/Transforms/InstCombine/fabs.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/fabs.ll +++ llvm/trunk/test/Transforms/InstCombine/fabs.ll @@ -0,0 +1,100 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +; Make sure all library calls are eliminated when the input is known positive. + +declare float @fabsf(float) +declare double @fabs(double) +declare fp128 @fabsl(fp128) + +define float @square_fabs_call_f32(float %x) { + %mul = fmul float %x, %x + %fabsf = tail call float @fabsf(float %mul) + ret float %fabsf + +; CHECK-LABEL: square_fabs_call_f32( +; CHECK-NEXT: %mul = fmul float %x, %x +; CHECK-NEXT: ret float %mul +} + +define double @square_fabs_call_f64(double %x) { + %mul = fmul double %x, %x + %fabs = tail call double @fabs(double %mul) + ret double %fabs + +; CHECK-LABEL: square_fabs_call_f64( +; CHECK-NEXT: %mul = fmul double %x, %x +; CHECK-NEXT: ret double %mul +} + +define fp128 @square_fabs_call_f128(fp128 %x) { + %mul = fmul fp128 %x, %x + %fabsl = tail call fp128 @fabsl(fp128 %mul) + ret fp128 %fabsl + +; CHECK-LABEL: square_fabs_call_f128( +; CHECK-NEXT: %mul = fmul fp128 %x, %x +; CHECK-NEXT: ret fp128 %mul +} + +; Make sure all intrinsic calls are eliminated when the input is known positive. + +declare float @llvm.fabs.f32(float) +declare double @llvm.fabs.f64(double) +declare fp128 @llvm.fabs.f128(fp128) + +define float @square_fabs_intrinsic_f32(float %x) { + %mul = fmul float %x, %x + %fabsf = tail call float @llvm.fabs.f32(float %mul) + ret float %fabsf + +; CHECK-LABEL: square_fabs_intrinsic_f32( +; CHECK-NEXT: %mul = fmul float %x, %x +; CHECK-NEXT: ret float %mul +} + +define double @square_fabs_intrinsic_f64(double %x) { + %mul = fmul double %x, %x + %fabs = tail call double @llvm.fabs.f64(double %mul) + ret double %fabs + +; CHECK-LABEL: square_fabs_intrinsic_f64( +; CHECK-NEXT: %mul = fmul double %x, %x +; CHECK-NEXT: ret double %mul +} + +define fp128 @square_fabs_intrinsic_f128(fp128 %x) { + %mul = fmul fp128 %x, %x + %fabsl = tail call fp128 @llvm.fabs.f128(fp128 %mul) + ret fp128 %fabsl + +; CHECK-LABEL: square_fabs_intrinsic_f128( +; CHECK-NEXT: %mul = fmul fp128 %x, %x +; CHECK-NEXT: ret fp128 %mul +} + +; Shrinking a library call to a smaller type should not be inhibited by nor inhibit the square optimization. + +define float @square_fabs_shrink_call1(float %x) { + %ext = fpext float %x to double + %sq = fmul double %ext, %ext + %fabs = call double @fabs(double %sq) + %trunc = fptrunc double %fabs to float + ret float %trunc + +; CHECK-LABEL: square_fabs_shrink_call1( +; CHECK-NEXT: %trunc = fmul float %x, %x +; CHECK-NEXT: ret float %trunc +} + +define float @square_fabs_shrink_call2(float %x) { + %sq = fmul float %x, %x + %ext = fpext float %sq to double + %fabs = call double @fabs(double %ext) + %trunc = fptrunc double %fabs to float + ret float %trunc + +; CHECK-LABEL: square_fabs_shrink_call2( +; CHECK-NEXT: %sq = fmul float %x, %x +; CHECK-NEXT: ret float %sq +} +