Index: llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -68,12 +68,6 @@ /* Specialized optimizations */ - // recip (half or native) - bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); - - // divide (half or native) - bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); - // pow/powr/pown bool fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); @@ -655,19 +649,6 @@ case AMDGPULibFunc::EI_COS: case AMDGPULibFunc::EI_SIN: return fold_sincos(FPOp, B, FInfo, AA); - case AMDGPULibFunc::EI_RECIP: - // skip vector function - assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE || - FInfo.getPrefix() == AMDGPULibFunc::HALF) && - "recip must be an either native or half function"); - return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo); - - case AMDGPULibFunc::EI_DIVIDE: - // skip vector function - assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE || - FInfo.getPrefix() == AMDGPULibFunc::HALF) && - "divide must be an either native or half function"); - return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo); case AMDGPULibFunc::EI_FMA: case AMDGPULibFunc::EI_MAD: case AMDGPULibFunc::EI_NFMA: @@ -755,45 +736,6 @@ return false; } -// [native_]half_recip(c) ==> 1.0/c -bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B, - const FuncInfo &FInfo) { - Value *opr0 = CI->getArgOperand(0); - if (ConstantFP *CF = dyn_cast(opr0)) { - // Just create a normal div. Later, InstCombine will be able - // to compute the divide into a constant (avoid check float infinity - // or subnormal at this point). - Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0), - opr0, - "recip2div"); - LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); - replaceCall(nval); - return true; - } - return false; -} - -// [native_]half_divide(x, c) ==> x/c -bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B, - const FuncInfo &FInfo) { - Value *opr0 = CI->getArgOperand(0); - Value *opr1 = CI->getArgOperand(1); - ConstantFP *CF0 = dyn_cast(opr0); - ConstantFP *CF1 = dyn_cast(opr1); - - if ((CF0 && CF1) || // both are constants - (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32))) - // CF1 is constant && f32 divide - { - Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0), - opr1, "__div2recip"); - Value *nval = B.CreateFMul(opr0, nval1, "__div2mul"); - replaceCall(nval); - return true; - } - return false; -} - namespace llvm { static double log2(double V) { #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L Index: llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -156,8 +156,10 @@ declare float @_Z10half_recipf(float) +; Do nothing, the underlying implementation will optimize correctly +; after inlining. ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_divide -; GCN: fmul fast float %tmp, 0x3FD5555560000000 +; GCN: %call = tail call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00) define amdgpu_kernel void @test_native_divide(ptr addrspace(1) nocapture %a) { entry: %tmp = load float, ptr addrspace(1) %a, align 4 @@ -168,8 +170,10 @@ declare float @_Z13native_divideff(float, float) +; Do nothing, the optimization will naturally happen after inlining. + ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_divide -; GCN: fmul fast float %tmp, 0x3FD5555560000000 +; GCN: %call = tail call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00) define amdgpu_kernel void @test_half_divide(ptr addrspace(1) nocapture %a) { entry: %tmp = load float, ptr addrspace(1) %a, align 4