Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInline.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInline.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInline.cpp @@ -218,7 +218,7 @@ LocalParams, TTI, GetAssumptionCache, None, PSI, RemarksEnabled ? &ORE : nullptr); - if (IC && !IC.isAlways()) { + if (IC && !IC.isAlways() && !Callee->hasFnAttribute(Attribute::InlineHint)) { // Single BB does not increase total BB amount, thus subtract 1 size_t Size = Caller->size() + Callee->size() - 1; if (MaxBB && Size > MaxBB) Index: llvm/trunk/test/CodeGen/AMDGPU/inline-maxbb.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/inline-maxbb.ll +++ llvm/trunk/test/CodeGen/AMDGPU/inline-maxbb.ll @@ -31,3 +31,38 @@ store volatile i32 %res, i32 addrspace(1)* undef ret void } + + +; inlinehint +define i32 @callee_hint(i32 %x) #0 { +entry: + %cc = icmp eq i32 %x, 1 + br i1 %cc, label %ret_res, label %mulx + +mulx: + %mul1 = mul i32 %x, %x + %mul2 = mul i32 %mul1, %x + %mul3 = mul i32 %mul1, %mul2 + %mul4 = mul i32 %mul3, %mul2 + %mul5 = mul i32 %mul4, %mul3 + br label %ret_res + +ret_res: + %r = phi i32 [ %mul5, %mulx ], [ %x, %entry ] + ret i32 %r +} + +; INL-LABEL: @caller_hint +; NOINL-LABEL: @caller_hint +; INL: mul i32 +; INL-NOT: call i32 +; NOINL: mul i32 +; NOINL-NOT: call i32 + +define amdgpu_kernel void @caller_hint(i32 %x) { + %res = call i32 @callee_hint(i32 %x) + store volatile i32 %res, i32 addrspace(1)* undef + ret void +} + +attributes #0 = { inlinehint }