diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -31,7 +31,7 @@ "amdgpu-work-item-id-z", "amdgpu-work-group-id-x", "amdgpu-work-group-id-y", "amdgpu-work-group-id-z", "amdgpu-dispatch-ptr", "amdgpu-dispatch-id", - "amdgpu-implicitarg-ptr"}; + "amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"}; class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { private: diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2958,12 +2958,6 @@ if (!CLI.CB) report_fatal_error("unsupported libcall legalization"); - if (!AMDGPUTargetMachine::EnableFixedFunctionABI && - !CLI.CB->getCalledFunction() && CallConv != CallingConv::AMDGPU_Gfx) { - return lowerUnhandledCall(CLI, InVals, - "unsupported indirect call to function "); - } - if (IsTailCall && MF.getTarget().Options.GuaranteedTailCallOpt) { return lowerUnhandledCall(CLI, InVals, "unsupported required tail call to function "); diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -334,4 +334,4 @@ ; HSA: attributes #17 = { nounwind "uniform-work-group-size"="false" } ; HSA: attributes #18 = { nounwind } ; HSA: attributes #19 = { nounwind "amdgpu-calls" "uniform-work-group-size"="false" } -; HSA: attributes #20 = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" } +; HSA: attributes #20 = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" } diff --git a/llvm/test/CodeGen/AMDGPU/call-constant.ll b/llvm/test/CodeGen/AMDGPU/call-constant.ll --- a/llvm/test/CodeGen/AMDGPU/call-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/call-constant.ll @@ -4,8 +4,8 @@ ; FIXME: Emitting unnecessary flat_scratch setup ; GCN-LABEL: {{^}}test_call_undef: -; SDAG: s_mov_b32 flat_scratch_lo, s11 -; SDAG: s_add_u32 s10, s10, s15 +; SDAG: s_mov_b32 flat_scratch_lo, s13 +; SDAG: s_add_u32 s12, s12, s17 ; SDAG: s_lshr_b32 ; GCN: s_endpgm define amdgpu_kernel void @test_call_undef() #0 { @@ -26,8 +26,8 @@ } ; GCN-LABEL: {{^}}test_call_null: -; SDAG: s_mov_b32 flat_scratch_lo, s11 -; SDAG: s_add_u32 s10, s10, s15 +; SDAG: s_mov_b32 flat_scratch_lo, s13 +; SDAG: s_add_u32 s12, s12, s17 ; SDAG: s_lshr_b32 ; GISEL: s_swappc_b64 s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -1,15 +1,48 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s + +target datalayout = "A5" + ; GCN-LABEL: define internal void @indirect() #0 { +; GFX9-LABEL: {{^}}indirect: define internal void @indirect() { ret void } ; GCN-LABEL: define amdgpu_kernel void @test_simple_indirect_call() #1 { +; GFX9-LABEL: {{^}}test_simple_indirect_call: +; GFX9: s_add_u32 flat_scratch_lo, s12, s17 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 +; GFX9-NEXT: s_mov_b32 s13, s15 +; GFX9-NEXT: s_mov_b32 s12, s14 +; GFX9-NEXT: s_load_dwordx2 s[14:15], s[4:5], 0x4 +; GFX9-NEXT: s_add_u32 s0, s0, s17 +; GFX9-NEXT: s_addc_u32 s1, s1, 0 +; GFX9-NEXT: s_mov_b32 s32, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_lshr_b32 s14, s14, 16 +; GFX9-NEXT: s_mul_i32 s14, s14, s15 +; GFX9-NEXT: v_mul_lo_u32 v3, s14, v0 +; GFX9-NEXT: s_getpc_b64 s[18:19] +; GFX9-NEXT: s_add_u32 s18, s18, indirect@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s19, s19, indirect@rel32@hi+12 +; GFX9-NEXT: s_mov_b32 s14, s16 +; GFX9-NEXT: v_mad_u32_u24 v3, v1, s15, v3 +; GFX9-NEXT: v_add_lshl_u32 v5, v3, v2, 3 +; GFX9-NEXT: v_mov_b32_e32 v3, s18 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GFX9-NEXT: v_mov_b32_e32 v4, s19 +; GFX9-NEXT: v_or3_b32 v0, v0, v1, v2 +; GFX9-NEXT: ds_write_b64 v5, v[3:4] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GFX9-NEXT: s_endpgm define amdgpu_kernel void @test_simple_indirect_call() { - %fptr = alloca void()* - store void()* @indirect, void()** %fptr - %fp = load void()*, void()** %fptr + %fptr = alloca void()*, addrspace(5) + %fptr.cast = addrspacecast void()* addrspace(5)* %fptr to void()** + store void()* @indirect, void()** %fptr.cast + %fp = load void()*, void()** %fptr.cast call void %fp() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll b/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll --- a/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll @@ -62,13 +62,6 @@ ret i32 %call } -; GCN: :0:0: in function test_indirect_call void (void ()*): unsupported indirect call to function -; R600: in function test_indirect_call{{.*}}: unsupported call to function -define void @test_indirect_call(void()* %fptr) { - call void %fptr() - ret void -} - ; GCN: :0:0: in function test_c_call_from_shader i32 (): unsupported calling convention for call from graphics shader of function defined_function ; R600: in function test_c_call{{.*}}: unsupported call to function defined_function define amdgpu_ps i32 @test_c_call_from_shader() { @@ -83,12 +76,3 @@ ret i32 %call } -; FIXME: Bad error message -; GCN: error: :0:0: in function test_call_absolute void (): unsupported indirect call to function -; R600: error: :0:0: in function test_call_absolute void (): unsupported call to function -define amdgpu_kernel void @test_call_absolute() #0 { - %val = call i32 inttoptr (i64 1234 to i32(i32)*) (i32 1) - %op = add i32 %val, 1 - store volatile i32 %op, i32 addrspace(1)* undef - ret void -}