Index: llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -75,9 +75,6 @@ // rootn bool fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); - // fma/mad - bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); - // -fuse-native for sincos bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo); @@ -649,11 +646,6 @@ case AMDGPULibFunc::EI_COS: case AMDGPULibFunc::EI_SIN: return fold_sincos(FPOp, B, FInfo, AA); - case AMDGPULibFunc::EI_FMA: - case AMDGPULibFunc::EI_MAD: - case AMDGPULibFunc::EI_NFMA: - // skip vector function - return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo); default: break; } @@ -1087,50 +1079,6 @@ return false; } -bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B, - const FuncInfo &FInfo) { - Value *opr0 = CI->getArgOperand(0); - Value *opr1 = CI->getArgOperand(1); - Value *opr2 = CI->getArgOperand(2); - - ConstantFP *CF0 = dyn_cast(opr0); - ConstantFP *CF1 = dyn_cast(opr1); - if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) { - // fma/mad(a, b, c) = c if a=0 || b=0 - LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n"); - replaceCall(opr2); - return true; - } - if (CF0 && CF0->isExactlyValue(1.0f)) { - // fma/mad(a, b, c) = b+c if a=1 - LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2 - << "\n"); - Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd"); - replaceCall(nval); - return true; - } - if (CF1 && CF1->isExactlyValue(1.0f)) { - // fma/mad(a, b, c) = a+c if b=1 - LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2 - << "\n"); - Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd"); - replaceCall(nval); - return true; - } - if (ConstantFP *CF = dyn_cast(opr2)) { - if (CF->isZero()) { - // fma/mad(a, b, c) = a*b if c=0 - LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " - << *opr1 << "\n"); - Value *nval = B.CreateFMul(opr0, opr1, "fmamul"); - replaceCall(nval); - return true; - } - } - - return false; -} - // Get a scalar native builtin single argument FP function FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M, const FuncInfo &FInfo) { Index: llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -471,7 +471,7 @@ } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_0x -; GCN: store float %y, ptr addrspace(1) %a +; GCN: %call = tail call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y) define amdgpu_kernel void @test_fma_0x(ptr addrspace(1) nocapture %a, float %y) { entry: %tmp = load float, ptr addrspace(1) %a, align 4 @@ -483,7 +483,7 @@ declare float @_Z3fmafff(float, float, float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x0 -; GCN: store float %y, ptr addrspace(1) %a +; GCN: %call = tail call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y) define amdgpu_kernel void @test_fma_x0(ptr addrspace(1) nocapture %a, float %y) { entry: %tmp = load float, ptr addrspace(1) %a, align 4 @@ -493,7 +493,7 @@ } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_0x -; GCN: store float %y, ptr addrspace(1) %a +; GCN: %call = tail call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y) define amdgpu_kernel void @test_mad_0x(ptr addrspace(1) nocapture %a, float %y) { entry: %tmp = load float, ptr addrspace(1) %a, align 4 @@ -505,7 +505,7 @@ declare float @_Z3madfff(float, float, float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_x0 -; GCN: store float %y, ptr addrspace(1) %a +; GCN: %call = tail call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y) define amdgpu_kernel void @test_mad_x0(ptr addrspace(1) nocapture %a, float %y) { entry: %tmp = load float, ptr addrspace(1) %a, align 4 @@ -515,7 +515,7 @@ } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x1y -; GCN: %fmaadd = fadd fast float %tmp, %y +; GCN: %call = tail call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y) define amdgpu_kernel void @test_fma_x1y(ptr addrspace(1) nocapture %a, float %y) { entry: %tmp = load float, ptr addrspace(1) %a, align 4 @@ -525,7 +525,7 @@ } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_1xy -; GCN: %fmaadd = fadd fast float %tmp, %y +; GCN: %call = tail call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y) define amdgpu_kernel void @test_fma_1xy(ptr addrspace(1) nocapture %a, float %y) { entry: %tmp = load float, ptr addrspace(1) %a, align 4 @@ -535,7 +535,7 @@ } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_xy0 -; GCN: %fmamul = fmul fast float %tmp1, %tmp +; GCN: %call = tail call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00) define amdgpu_kernel void @test_fma_xy0(ptr addrspace(1) nocapture %a) { entry: %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1