Index: llvm/lib/Target/AMDGPU/AMDGPUInline.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInline.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInline.cpp @@ -153,26 +153,6 @@ return (unsigned)Thres; } -// Check if call is just a wrapper around another call. -// In this case we only have call and ret instructions. -static bool isWrapperOnlyCall(CallBase &CB) { - Function *Callee = CB.getCalledFunction(); - if (!Callee || Callee->size() != 1) - return false; - const BasicBlock &BB = Callee->getEntryBlock(); - if (const Instruction *I = BB.getFirstNonPHI()) { - if (!isa(I)) { - return false; - } - if (isa(*std::next(I->getIterator()))) { - LLVM_DEBUG(dbgs() << " Wrapper only call detected: " - << Callee->getName() << '\n'); - return true; - } - } - return false; -} - InlineCost AMDGPUInliner::getInlineCost(CallBase &CB) { Function *Callee = CB.getCalledFunction(); Function *Caller = CB.getCaller(); @@ -194,9 +174,6 @@ return llvm::InlineCost::getNever(IsViable.getFailureReason()); } - if (isWrapperOnlyCall(CB)) - return llvm::InlineCost::getAlways("wrapper-only call"); - InlineParams LocalParams = Params; LocalParams.DefaultThreshold = (int)getInlineThreshold(CB); bool RemarksEnabled = false; Index: llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll @@ -40,7 +40,7 @@ ret void } -define coldcc float @sin_wrapper(float %x) { +define float @sin_wrapper(float %x) { bb: %call = tail call float @_Z3sinf(float %x) ret float %call @@ -83,7 +83,7 @@ %and = and i32 %tid, %n %arrayidx11 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %and %tmp12 = load float, float addrspace(5)* %arrayidx11, align 4 - %c2 = call coldcc float @sin_wrapper(float %tmp12) + %c2 = call float @sin_wrapper(float %tmp12) store float %c2, float addrspace(5)* %arrayidx7, align 4 %xor = xor i32 %tid, %n %arrayidx16 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %xor