Index: lib/Target/AMDGPU/AMDGPUTargetMachine.h =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -69,9 +69,6 @@ return -1; return 0; } - - LLVM_READONLY - bool enableFunctionCalls() const; }; //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -275,11 +275,6 @@ AMDGPUTargetMachine::~AMDGPUTargetMachine() = default; -bool AMDGPUTargetMachine::enableFunctionCalls() const { - return EnableAMDGPUFunctionCalls && - getTargetTriple().getArch() == Triple::amdgcn; -} - StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const { Attribute GPUAttr = F.getFnAttribute("target-cpu"); return GPUAttr.hasAttribute(Attribute::None) ? @@ -486,8 +481,10 @@ public: GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM) : AMDGPUPassConfig(TM, PM) { - // It is necessary to know the register usage of the entire call graph. - setRequiresCodeGenSCCOrder(EnableAMDGPUFunctionCalls); + // It is necessary to know the register usage of the entire call graph. We + // allow calls without EnableAMDGPUFunctionCalls if they are marked + // noinline, so this is always required. + setRequiresCodeGenSCCOrder(true); } GCNTargetMachine &getGCNTargetMachine() const { @@ -556,15 +553,18 @@ addPass(createAMDGPULowerIntrinsicsPass()); - // Function calls are not supported, so make sure we inline everything. - addPass(createAMDGPUAlwaysInlinePass()); - addPass(createAlwaysInlinerLegacyPass()); - // We need to add the barrier noop pass, otherwise adding the function - // inlining pass will cause all of the PassConfigs passes to be run - // one function at a time, which means if we have a nodule with two - // functions, then we will generate code for the first function - // without ever running any passes on the second. - addPass(createBarrierNoopPass()); + if (TM.getTargetTriple().getArch() == Triple::r600 || + !EnableAMDGPUFunctionCalls) { + // Function calls are not supported, so make sure we inline everything. + addPass(createAMDGPUAlwaysInlinePass()); + addPass(createAlwaysInlinerLegacyPass()); + // We need to add the barrier noop pass, otherwise adding the function + // inlining pass will cause all of the PassConfigs passes to be run + // one function at a time, which means if we have a nodule with two + // functions, then we will generate code for the first function + // without ever running any passes on the second. + addPass(createBarrierNoopPass()); + } if (TM.getTargetTriple().getArch() == Triple::amdgcn) { // TODO: May want to move later or split into an early and late one. Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1958,11 +1958,6 @@ // The wave scratch offset register is used as the global base pointer. SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { - const AMDGPUTargetMachine &TM = - static_cast(getTargetMachine()); - if (!TM.enableFunctionCalls()) - return AMDGPUTargetLowering::LowerCall(CLI, InVals); - SelectionDAG &DAG = CLI.DAG; const SDLoc &DL = CLI.DL; SmallVector &Outs = CLI.Outs; Index: test/CodeGen/AMDGPU/basic-call-return.ll =================================================================== --- test/CodeGen/AMDGPU/basic-call-return.ll +++ test/CodeGen/AMDGPU/basic-call-return.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s define void @void_func_void() #2 { ret void Index: test/CodeGen/AMDGPU/byval-frame-setup.ll =================================================================== --- test/CodeGen/AMDGPU/byval-frame-setup.ll +++ test/CodeGen/AMDGPU/byval-frame-setup.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s %struct.ByValStruct = type { [4 x i32] } Index: test/CodeGen/AMDGPU/call-argument-types.ll =================================================================== --- test/CodeGen/AMDGPU/call-argument-types.ll +++ test/CodeGen/AMDGPU/call-argument-types.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-function-calls -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,MESA %s -; RUN: llc -march=amdgcn -mcpu=hawaii -amdgpu-function-calls -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,MESA %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-function-calls -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,VI,MESA %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-function-calls -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,HSA %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,MESA %s +; RUN: llc -march=amdgcn -mcpu=hawaii -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,MESA %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,VI,MESA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,HSA %s declare void @external_void_func_i1(i1) #0 declare void @external_void_func_i1_signext(i1 signext) #0 Index: test/CodeGen/AMDGPU/call-encoding.ll =================================================================== --- test/CodeGen/AMDGPU/call-encoding.ll +++ test/CodeGen/AMDGPU/call-encoding.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-function-calls -filetype=obj -verify-machineinstrs < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=fiji -d - | FileCheck -check-prefixes=GCN,VI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-function-calls -filetype=obj -verify-machineinstrs < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=gfx900 -d - | FileCheck -check-prefixes=GCN,GFX9 %s -; XUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -amdgpu-function-calls -filetype=obj -verify-machineinstrs < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=hawaii -d - | FileCheck -check-prefixes=GCN,CI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -filetype=obj -verify-machineinstrs < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=fiji -d - | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -verify-machineinstrs < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=gfx900 -d - | FileCheck -check-prefixes=GCN,GFX9 %s +; XUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -filetype=obj -verify-machineinstrs < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=hawaii -d - | FileCheck -check-prefixes=GCN,CI %s ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 Index: test/CodeGen/AMDGPU/call-graph-register-usage.ll =================================================================== --- test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-BUG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-BUG %s ; Make sure to run a GPU with the SGPR allocation bug. Index: test/CodeGen/AMDGPU/call-preserved-registers.ll =================================================================== --- test/CodeGen/AMDGPU/call-preserved-registers.ll +++ test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare void @external_void_func_void() #0 Index: test/CodeGen/AMDGPU/call-return-types.ll =================================================================== --- test/CodeGen/AMDGPU/call-return-types.ll +++ test/CodeGen/AMDGPU/call-return-types.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare void @external_void_func_void() #0 Index: test/CodeGen/AMDGPU/call.ll =================================================================== --- test/CodeGen/AMDGPU/call.ll +++ test/CodeGen/AMDGPU/call.ll @@ -1,5 +1,3 @@ -; RUN: not llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck %s -; RUN: not llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s 2>&1 | FileCheck %s ; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s ; CHECK: in function test_call_external{{.*}}: unsupported call to function external_function Index: test/CodeGen/AMDGPU/callee-frame-setup.ll =================================================================== --- test/CodeGen/AMDGPU/callee-frame-setup.ll +++ test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-function-calls -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s -; RUN: llc -amdgpu-function-calls -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}callee_no_stack: ; GCN: ; BB#0: Index: test/CodeGen/AMDGPU/callee-special-input-sgprs.ll =================================================================== --- test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-function-calls -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s -; RUN: llc -amdgpu-function-calls -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}use_dispatch_ptr: ; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0 Index: test/CodeGen/AMDGPU/callee-special-input-vgprs.ll =================================================================== --- test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-function-calls -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; GCN-LABEL: {{^}}use_workitem_id_x: ; GCN: s_waitcnt Index: test/CodeGen/AMDGPU/frame-index-elimination.ll =================================================================== --- test/CodeGen/AMDGPU/frame-index-elimination.ll +++ test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -amdgpu-function-calls -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; Test that non-entry function frame indices are expanded properly to ; give an index relative to the scratch wave offset register @@ -144,6 +144,7 @@ store volatile i32 %mul, i32 addrspace(3)* undef ret void } + ; GCN-LABEL: {{^}}func_other_fi_user_non_inline_imm_offset_i32_vcc_live: ; GCN: s_sub_u32 [[DIFF:s[0-9]+]], s5, s4 ; GCN-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[DIFF]], 6 Index: test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA,HSA-NOENV %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa-opencl -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA,HSA-OPENCL %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MESA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA,HSA-NOENV %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-opencl -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA,HSA-OPENCL %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MESA %s ; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty: ; GCN: enable_sgpr_kernarg_segment_ptr = 1 Index: test/CodeGen/AMDGPU/mem-builtins.ll =================================================================== --- test/CodeGen/AMDGPU/mem-builtins.ll +++ test/CodeGen/AMDGPU/mem-builtins.ll @@ -1,4 +1,5 @@ -; RUN: not llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: not llc -march=r600 < %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s declare i32 @memcmp(i8 addrspace(1)* readonly nocapture, i8 addrspace(1)* readonly nocapture, i64) #0 declare i8 addrspace(1)* @memchr(i8 addrspace(1)* readonly nocapture, i32, i64) #1 @@ -9,6 +10,9 @@ ; ERROR: error: :0:0: in function test_memcmp void (i8 addrspace(1)*, i8 addrspace(1)*, i32*): unsupported call to function memcmp + +; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp@rel32@lo+4 +; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp@rel32@hi+4 define amdgpu_kernel void @test_memcmp(i8 addrspace(1)* %x, i8 addrspace(1)* %y, i32* nocapture %p) #0 { entry: %cmp = tail call i32 @memcmp(i8 addrspace(1)* %x, i8 addrspace(1)* %y, i64 2) @@ -17,6 +21,9 @@ } ; ERROR: error: :0:0: in function test_memchr void (i8 addrspace(1)*, i32, i64): unsupported call to function memchr + +; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr@rel32@lo+4 +; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr@rel32@hi+4 define amdgpu_kernel void @test_memchr(i8 addrspace(1)* %src, i32 %char, i64 %len) #0 { %res = call i8 addrspace(1)* @memchr(i8 addrspace(1)* %src, i32 %char, i64 %len) store volatile i8 addrspace(1)* %res, i8 addrspace(1)* addrspace(1)* undef @@ -24,6 +31,9 @@ } ; ERROR: error: :0:0: in function test_strcpy void (i8*, i8*): unsupported call to function strcpy + +; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy@rel32@lo+4 +; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy@rel32@hi+4 define amdgpu_kernel void @test_strcpy(i8* %dst, i8* %src) #0 { %res = call i8* @strcpy(i8* %dst, i8* %src) store volatile i8* %res, i8* addrspace(1)* undef @@ -31,6 +41,9 @@ } ; ERROR: error: :0:0: in function test_strcmp void (i8*, i8*): unsupported call to function strcmp + +; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp@rel32@lo+4 +; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp@rel32@hi+4 define amdgpu_kernel void @test_strcmp(i8* %src0, i8* %src1) #0 { %res = call i32 @strcmp(i8* %src0, i8* %src1) store volatile i32 %res, i32 addrspace(1)* undef @@ -38,6 +51,9 @@ } ; ERROR: error: :0:0: in function test_strlen void (i8*): unsupported call to function strlen + +; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen@rel32@lo+4 +; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen@rel32@hi+4 define amdgpu_kernel void @test_strlen(i8* %src) #0 { %res = call i32 @strlen(i8* %src) store volatile i32 %res, i32 addrspace(1)* undef @@ -45,6 +61,9 @@ } ; ERROR: error: :0:0: in function test_strnlen void (i8*, i32): unsupported call to function strnlen + +; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen@rel32@lo+4 +; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen@rel32@hi+4 define amdgpu_kernel void @test_strnlen(i8* %src, i32 %size) #0 { %res = call i32 @strnlen(i8* %src, i32 %size) store volatile i32 %res, i32 addrspace(1)* undef Index: test/CodeGen/AMDGPU/nested-calls.ll =================================================================== --- test/CodeGen/AMDGPU/nested-calls.ll +++ test/CodeGen/AMDGPU/nested-calls.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-sroa=0 -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=hawaii -amdgpu-function-calls -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-sroa=0 -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=hawaii -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=VI %s ; Test calls when called by other callable functions rather than ; kernels. Index: test/CodeGen/AMDGPU/private-memory-broken.ll =================================================================== --- test/CodeGen/AMDGPU/private-memory-broken.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: not llc -verify-machineinstrs -march=amdgcn %s -o /dev/null 2>&1 | FileCheck %s -; RUN: not llc -verify-machineinstrs -march=amdgcn -mcpu=tonga %s -o /dev/null 2>&1 | FileCheck %s - -; Make sure promote alloca pass doesn't crash - -; CHECK: unsupported call - -declare i32 @foo(i32*) nounwind - -define amdgpu_kernel void @call_private(i32 addrspace(1)* %out, i32 %in) nounwind { -entry: - %tmp = alloca [2 x i32] - %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0 - %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1 - store i32 0, i32* %tmp1 - store i32 1, i32* %tmp2 - %tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in - %val = call i32 @foo(i32* %tmp3) nounwind - store i32 %val, i32 addrspace(1)* %out - ret void -} Index: test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll +++ test/CodeGen/AMDGPU/promote-alloca-bitcast-function.ll @@ -1,19 +1,21 @@ ; RUN: not llc -march=amdgcn < %s 2>&1 | FileCheck %s +; CHECK: LLVM ERROR: indirect calls not handled + ; Make sure that AMDGPUPromoteAlloca doesn't crash if the called ; function is a constantexpr cast of a function. declare void @foo(float*) #0 declare void @foo.varargs(...) #0 -; CHECK: in function crash_call_constexpr_cast{{.*}}: unsupported call to function foo +; XCHECK: in function crash_call_constexpr_cast{{.*}}: unsupported call to function foo define amdgpu_kernel void @crash_call_constexpr_cast() #0 { %alloca = alloca i32 call void bitcast (void (float*)* @foo to void (i32*)*)(i32* %alloca) #0 ret void } -; CHECK: in function crash_call_constexpr_cast{{.*}}: unsupported call to function foo.varargs +; XCHECK: in function crash_call_constexpr_cast{{.*}}: unsupported call to function foo.varargs define amdgpu_kernel void @crash_call_constexpr_cast_varargs() #0 { %alloca = alloca i32 call void bitcast (void (...)* @foo.varargs to void (i32*)*)(i32* %alloca) #0 Index: test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll +++ test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=ASM %s +; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=ASM %s ; IR-LABEL: define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 { ; IR: alloca [5 x i32] @@ -74,6 +74,26 @@ ret void } +declare i32 @foo(i32*) #0 + +; ASM-LABEL: {{^}}call_private: +; ASM: buffer_store_dword +; ASM: buffer_store_dword +; ASM: s_swappc_b64 +; ASM: ScratchSize: 16396 +define amdgpu_kernel void @call_private(i32 addrspace(1)* %out, i32 %in) #0 { +entry: + %tmp = alloca [2 x i32] + %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0 + %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1 + store i32 0, i32* %tmp1 + store i32 1, i32* %tmp2 + %tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in + %val = call i32 @foo(i32* %tmp3) + store i32 %val, i32 addrspace(1)* %out + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" }