Index: lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp +++ lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp @@ -21,6 +21,12 @@ namespace { +static cl::opt StressCalls( + "amdgpu-stress-function-calls", + cl::Hidden, + cl::desc("Force all functions to be noinline"), + cl::init(false)); + class AMDGPUAlwaysInline : public ModulePass { bool GlobalOpt; @@ -57,9 +63,13 @@ } } + auto NewAttr = StressCalls ? Attribute::NoInline : Attribute::AlwaysInline; + auto IncompatAttr + = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline; + for (Function &F : M) { if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() && - !F.hasFnAttribute(Attribute::NoInline)) + !F.hasFnAttribute(IncompatAttr)) FuncsToClone.push_back(&F); } @@ -71,8 +81,8 @@ } for (Function &F : M) { - if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) { - F.addFnAttr(Attribute::AlwaysInline); + if (F.hasLocalLinkage() && !F.hasFnAttribute(IncompatAttr)) { + F.addFnAttr(NewAttr); } } return false; Index: test/CodeGen/AMDGPU/stress-calls.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/stress-calls.ll @@ -0,0 +1,36 @@ +; RUN: opt -S -amdgpu-stress-function-calls -amdgpu-always-inline %s | FileCheck %s + +; CHECK: define internal fastcc i32 @alwaysinline_func(i32 %a) #0 { +define internal fastcc i32 @alwaysinline_func(i32 %a) alwaysinline { +entry: + %tmp0 = add i32 %a, 1 + ret i32 %tmp0 +} + +; CHECK: define internal fastcc i32 @noinline_func(i32 %a) #1 { +define internal fastcc i32 @noinline_func(i32 %a) noinline { +entry: + %tmp0 = add i32 %a, 2 + ret i32 %tmp0 +} + +; CHECK: define internal fastcc i32 @unmarked_func(i32 %a) #1 { +define internal fastcc i32 @unmarked_func(i32 %a) { +entry: + %tmp0 = add i32 %a, 3 + ret i32 %tmp0 +} + +define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) { +entry: + %tmp0 = call i32 @alwaysinline_func(i32 1) + store volatile i32 %tmp0, i32 addrspace(1)* %out + %tmp1 = call i32 @noinline_func(i32 1) + store volatile i32 %tmp1, i32 addrspace(1)* %out + %tmp2 = call i32 @unmarked_func(i32 1) + store volatile i32 %tmp2, i32 addrspace(1)* %out + ret void +} + +; CHECK: attributes #0 = { alwaysinline } +; CHECK: attributes #1 = { noinline }