Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1671,6 +1671,23 @@ break; } + case Intrinsic::copysign: { + Value *ExtSrc0; + Value *ExtSrc1; + + // copysign (fpext x), (fpext y) -> copysign x, y + if (match(II->getArgOperand(0), m_FPExt(m_Value(ExtSrc0))) && + match(II->getArgOperand(1), m_FPExt(m_Value(ExtSrc1)))) { + Value *F = Intrinsic::getDeclaration(II->getModule(), II->getIntrinsicID(), + { ExtSrc0->getType() }); + CallInst *NewCall = Builder->CreateCall(F, { ExtSrc0, ExtSrc1 }); + NewCall->copyFastMathFlags(II); + NewCall->takeName(II); + return new FPExtInst(NewCall, II->getType()); + } + + break; + } case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: // Turn PPC lvx -> load if the pointer is known aligned. Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1416,8 +1416,26 @@ OpBundles, II->getName()); NewCI->copyFastMathFlags(II); return NewCI; - } - } + } + case Intrinsic::copysign: { + Type *Ty = CI.getType(); + // Do binary FP operation on smaller type. + // (fptrunc (copysign x, y)) -> (copysign (fptrunc x), (fptrunc y)) + Value *Trunc0 = Builder->CreateFPTrunc(II->getArgOperand(0), Ty); + Value *Trunc1 = Builder->CreateFPTrunc(II->getArgOperand(1), Ty); + + Function *Overload = Intrinsic::getDeclaration( + CI.getModule(), II->getIntrinsicID(), Ty); + + SmallVector OpBundles; + II->getOperandBundlesAsDefs(OpBundles); + + CallInst *NewCI = CallInst::Create(Overload, { Trunc0, Trunc1 }, + OpBundles, II->getName()); + NewCI->copyFastMathFlags(II); + return NewCI; + } + } } return nullptr; Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -962,6 +962,21 @@ return NewCall; } +// Replace a libcall \p CI with a call to intrinsic \p IID +static Value *replaceBinaryCall(CallInst *CI, IRBuilder<> &B, Intrinsic::ID IID) { + // Propagate fast-math flags from the existing call to the new call. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + + Module *M = CI->getModule(); + Value *V0 = CI->getArgOperand(0); + Value *V1 = CI->getArgOperand(1); + Function *F = Intrinsic::getDeclaration(M, IID, CI->getType()); + CallInst *NewCall = B.CreateCall(F, { V0, V1 }); + NewCall->takeName(CI); + return NewCall; +} + /// Shrink double -> float for binary functions like 'fmin/fmax'. static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); @@ -2171,9 +2186,7 @@ return optimizeUnaryDoubleFP(CI, Builder, true); return nullptr; case LibFunc::copysign: - if (hasFloatVersion(FuncName)) - return optimizeBinaryDoubleFP(CI, Builder); - return nullptr; + return replaceBinaryCall(CI, Builder, Intrinsic::copysign); case LibFunc::fminf: case LibFunc::fmin: case LibFunc::fminl: Index: test/Transforms/InstCombine/copysign.ll =================================================================== --- test/Transforms/InstCombine/copysign.ll +++ test/Transforms/InstCombine/copysign.ll @@ -45,5 +45,81 @@ ret double %x } +; CHECK-LABEL: @reduce_precision( +; CHECK: %copysign = call float @llvm.copysign.f32(float %x, float %y) +; CHECK-NEXT: ret float %copysign +define float @reduce_precision(float %x, float %y) { + %x.ext = fpext float %x to double + %y.ext = fpext float %y to double + %copysign = call double @llvm.copysign.f64(double %x.ext, double %y.ext) + %trunc = fptrunc double %copysign to float + ret float %trunc +} + +; CHECK-LABEL: @reduce_precision_fmf( +; CHECK: %copysign = call nnan float @llvm.copysign.f32(float %x, float %y) +; CHECK-NEXT: ret float %copysign +define float @reduce_precision_fmf(float %x, float %y) { + %x.ext = fpext float %x to double + %y.ext = fpext float %y to double + %copysign = call nnan double @llvm.copysign.f64(double %x.ext, double %y.ext) + %trunc = fptrunc double %copysign to float + ret float %trunc +} + +; CHECK-LABEL: @reduce_precision_constant( +; CHECK: %trunc = call float @llvm.copysign.f32(float %x, float 4.000000e+00) +; CHECK-NEXT: ret float %trunc +define float @reduce_precision_constant(float %x) { + %x.ext = fpext float %x to double + %copysign = call double @llvm.copysign.f64(double %x.ext, double 4.0) + %trunc = fptrunc double %copysign to float + ret float %trunc +} + +; CHECK-LABEL: @reduce_precision_multi_use_src0( +; CHECK: %x.ext = fpext float %x to double +; CHECK-NEXT: %copysign = call float @llvm.copysign.f32(float %x, float %y) +; CHECK-NEXT: store volatile double %x.ext, +; CHECK-NEXT: ret float %copysign +define float @reduce_precision_multi_use_src0(float %x, float %y) { + %x.ext = fpext float %x to double + %y.ext = fpext float %y to double + %copysign = call double @llvm.copysign.f64(double %x.ext, double %y.ext) + %trunc = fptrunc double %copysign to float + store volatile double %x.ext, double* undef + ret float %trunc +} + +; CHECK-LABEL: @reduce_precision_multi_use_src1( +; CHECK: %y.ext = fpext float %y to double +; CHECK-NEXT: %copysign = call float @llvm.copysign.f32(float %x, float %y) +; CHECK-NEXT: store volatile double %y.ext +; CHECK-NEXT: ret float %copysign +define float @reduce_precision_multi_use_src1(float %x, float %y) { + %x.ext = fpext float %x to double + %y.ext = fpext float %y to double + %copysign = call double @llvm.copysign.f64(double %x.ext, double %y.ext) + %trunc = fptrunc double %copysign to float + store volatile double %y.ext, double* undef + ret float %trunc +} + +; CHECK-LABEL: @reduce_precision_multi_use_src0_src1( +; CHECK-NEXT: %x.ext = fpext float %x to double +; CHECK-NEXT: %y.ext = fpext float %y to double +; CHECK-NEXT: %copysign = call float @llvm.copysign.f32(float %x, float %y) +; CHECK-NEXT: store volatile double %x.ext, +; CHECK-NEXT: store volatile double %y.ext, +; CHECK-NEXT: ret float %copysign +define float @reduce_precision_multi_use_src0_src1(float %x, float %y) { + %x.ext = fpext float %x to double + %y.ext = fpext float %y to double + %copysign = call double @llvm.copysign.f64(double %x.ext, double %y.ext) + %trunc = fptrunc double %copysign to float + store volatile double %x.ext, double* undef + store volatile double %y.ext, double* undef + ret float %trunc +} attributes #0 = { nounwind readnone } Index: test/Transforms/InstCombine/float-shrink-compare.ll =================================================================== --- test/Transforms/InstCombine/float-shrink-compare.ll +++ test/Transforms/InstCombine/float-shrink-compare.ll @@ -223,16 +223,16 @@ } define i32 @test19(float %x, float %y, float %z) nounwind uwtable { - %1 = fpext float %x to double - %2 = fpext float %y to double - %3 = call double @copysign(double %1, double %2) nounwind - %4 = fpext float %z to double - %5 = fcmp oeq double %3, %4 - %6 = zext i1 %5 to i32 - ret i32 %6 + %x.ext = fpext float %x to double + %y.ext = fpext float %y to double + %copysign = call double @copysign(double %x.ext, double %y.ext) nounwind + %z.ext = fpext float %z to double + %cmp = fcmp oeq double %copysign, %z.ext + %cmp.ext = zext i1 %cmp to i32 + ret i32 %cmp.ext ; CHECK-LABEL: @test19( -; CHECK-NEXT: %copysignf = call float @copysignf(float %x, float %y) -; CHECK-NEXT: fcmp oeq float %copysignf, %z +; CHECK-NEXT: %copysign = call float @llvm.copysign.f32(float %x, float %y) +; CHECK-NEXT: fcmp oeq float %copysign, %z } define i32 @test20(float %x, float %y) nounwind uwtable {