Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -122,7 +122,7 @@ cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden); -static cl::opt EnableAMDGPUFunctionCalls( +static cl::opt EnableAMDGPUFunctionCallsOpt( "amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"), cl::location(AMDGPUTargetMachine::EnableFunctionCalls), @@ -330,7 +330,7 @@ } bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false; -bool AMDGPUTargetMachine::EnableFunctionCalls = false; +bool AMDGPUTargetMachine::EnableFunctionCalls = true; AMDGPUTargetMachine::~AMDGPUTargetMachine() = default; @@ -361,11 +361,11 @@ bool EnableOpt = getOptLevel() > CodeGenOpt::None; bool Internalize = InternalizeSymbols; - bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableAMDGPUFunctionCalls; + bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableFunctionCalls; bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt; bool LibCallSimplify = EnableLibCallSimplify && EnableOpt; - if (EnableAMDGPUFunctionCalls) { + if (EnableFunctionCalls) { delete Builder.Inliner; Builder.Inliner = createAMDGPUFunctionInliningPass(); } Index: test/CodeGen/AMDGPU/inline-calls.ll =================================================================== --- test/CodeGen/AMDGPU/inline-calls.ll +++ test/CodeGen/AMDGPU/inline-calls.ll @@ -1,16 +1,16 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,ALL %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,ALL %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefixes=R600,ALL %s -; CHECK-NOT: {{^}}func: +; ALL-NOT: {{^}}func: define internal fastcc i32 @func(i32 %a) { entry: %tmp0 = add i32 %a, 1 ret i32 %tmp0 } -; CHECK: {{^}}kernel: -; CHECK-NOT: call +; ALL: {{^}}kernel: +; GCN-NOT: s_swappc_b64 define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) { entry: %tmp0 = call i32 @func(i32 1) @@ -18,19 +18,19 @@ ret void } -; CHECK: {{^}}kernel2: -; CHECK-NOT: call +; ALL: {{^}}kernel2: +; GCN-NOT: s_swappc_b64 define amdgpu_kernel void @kernel2(i32 addrspace(1)* %out) { entry: call void @kernel(i32 addrspace(1)* %out) ret void } -; CHECK-NOT: func_alias +; ALL-NOT: func_alias @func_alias = alias i32 (i32), i32 (i32)* @func -; CHECK: {{^}}kernel3: -; CHECK-NOT: call +; ALL: {{^}}kernel3: +; GCN-NOT: s_swappc_b64 define amdgpu_kernel void @kernel3(i32 addrspace(1)* %out) { entry: %tmp0 = call i32 @func_alias(i32 1) @@ -38,11 +38,11 @@ ret void } -; CHECK-NOT: kernel_alias +; ALL-NOT: kernel_alias @kernel_alias = alias void (i32 addrspace(1)*), void (i32 addrspace(1)*)* @kernel -; CHECK: {{^}}kernel4: -; CHECK-NOT: call +; ALL: {{^}}kernel4: +; GCN-NOT: s_swappc_b64 define amdgpu_kernel void @kernel4(i32 addrspace(1)* %out) { entry: call void @kernel_alias(i32 addrspace(1)* %out)