Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -122,11 +122,11 @@ cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden); -static cl::opt EnableAMDGPUFunctionCalls( +static cl::opt EnableAMDGPUFunctionCallsOpt( "amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"), cl::location(AMDGPUTargetMachine::EnableFunctionCalls), - cl::init(false), + cl::init(true), cl::Hidden); // Enable lib calls simplifications @@ -361,11 +361,11 @@ bool EnableOpt = getOptLevel() > CodeGenOpt::None; bool Internalize = InternalizeSymbols; - bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableAMDGPUFunctionCalls; + bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableFunctionCalls; bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt; bool LibCallSimplify = EnableLibCallSimplify && EnableOpt; - if (EnableAMDGPUFunctionCalls) { + if (EnableFunctionCalls) { delete Builder.Inliner; Builder.Inliner = createAMDGPUFunctionInliningPass(); } @@ -426,6 +426,11 @@ CodeGenOpt::Level OL, bool JIT) : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) { setRequiresStructuredCFG(true); + + // Override the default since calls aren't ssupported for r600. + if (EnableFunctionCalls && + EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0) + EnableFunctionCalls = false; } const R600Subtarget *R600TargetMachine::getSubtargetImpl( Index: test/CodeGen/AMDGPU/amdgpu-inline.ll =================================================================== --- test/CodeGen/AMDGPU/amdgpu-inline.ll +++ test/CodeGen/AMDGPU/amdgpu-inline.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S -amdgpu-function-calls -inline-threshold=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INL1 %s -; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S -amdgpu-function-calls < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INLDEF %s +; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S -inline-threshold=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INL1 %s +; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INLDEF %s define coldcc float @foo(float %x, float %y) { entry: Index: test/CodeGen/AMDGPU/call-graph-register-usage.ll =================================================================== --- test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -84,7 +84,7 @@ ; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr: ; GCN: is_dynamic_callstack = 0 -; GCN: ; NumVgprs: 10 +; GCN: ; NumVgprs: 33 define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 { call void @indirect_use_10_vgpr() ret void @@ -225,6 +225,6 @@ } -attributes #0 = { nounwind norecurse } +attributes #0 = { nounwind noinline norecurse } attributes #1 = { nounwind noinline norecurse } attributes #2 = { nounwind noinline } Index: test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll =================================================================== --- test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll +++ test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-function-calls -amdgpu-always-inline %s | FileCheck -check-prefixes=CALLS-ENABLED,ALL %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-function-calls -amdgpu-stress-function-calls -amdgpu-always-inline %s | FileCheck -check-prefixes=STRESS-CALLS,ALL %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-always-inline %s | FileCheck -check-prefixes=CALLS-ENABLED,ALL %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-stress-function-calls -amdgpu-always-inline %s | FileCheck -check-prefixes=STRESS-CALLS,ALL %s target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" Index: test/CodeGen/AMDGPU/inline-calls.ll =================================================================== --- test/CodeGen/AMDGPU/inline-calls.ll +++ test/CodeGen/AMDGPU/inline-calls.ll @@ -2,15 +2,15 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s -; CHECK-NOT: {{^}}func: +; ALL-NOT: {{^}}func: define internal i32 @func(i32 %a) { entry: %tmp0 = add i32 %a, 1 ret i32 %tmp0 } -; CHECK: {{^}}kernel: -; CHECK-NOT: call +; ALL: {{^}}kernel: +; GCN-NOT: s_swappc_b64 define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) { entry: %tmp0 = call i32 @func(i32 1) @@ -19,10 +19,11 @@ } ; CHECK-NOT: func_alias +; ALL-NOT: func_alias @func_alias = alias i32 (i32), i32 (i32)* @func -; CHECK: {{^}}kernel3: -; CHECK-NOT: call +; ALL: {{^}}kernel3: +; GCN-NOT: s_swappc_b64 define amdgpu_kernel void @kernel3(i32 addrspace(1)* %out) { entry: %tmp0 = call i32 @func_alias(i32 1)