Index: lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -228,7 +228,8 @@ for (Instruction &I : BB) { CallSite CS(&I); if (CS) { - Function *Callee = CS.getCalledFunction(); + const Function *Callee + = dyn_cast(CS.getCalledValue()->stripPointerCasts()); // TODO: Do something with indirect calls. if (!Callee) { Index: test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll =================================================================== --- test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -295,6 +295,20 @@ ret void } +; HSA: define i32 @use_dispatch_ptr_ret_type() #7 { +define i32 @use_dispatch_ptr_ret_type() #1 { + %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() + store volatile i8 addrspace(4)* %dispatch.ptr, i8 addrspace(4)* addrspace(1)* undef + ret i32 0 +} + +; HSA: define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #7 { +define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 { + %f = call float bitcast (i32()* @use_dispatch_ptr_ret_type to float()*)() + %fadd = fadd float %f, 1.0 + ret float %fadd +} + attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind "target-cpu"="fiji" } attributes #2 = { nounwind "target-cpu"="gfx900" } Index: test/CodeGen/AMDGPU/call-constexpr.ll =================================================================== --- test/CodeGen/AMDGPU/call-constexpr.ll +++ test/CodeGen/AMDGPU/call-constexpr.ll @@ -16,31 +16,49 @@ ret void } -; GCN-LABEL: {{^}}use_workitem_id_x: +; GCN-LABEL: {{^}}use_workitem_id_y: ; GCN: s_waitcnt -; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GCN-NEXT: v_add_i32_e32 v0, vcc, 9, v0 ; GCN-NEXT: s_setpc_b64 -define i32 @use_workitem_id_x(i32 %arg0) #0 { - %id = call i32 @llvm.amdgcn.workitem.id.x() - %op = add i32 %id, %arg0 +define i32 @use_workitem_id_y() #0 { + %id = call i32 @llvm.amdgcn.workitem.id.y() + %op = add i32 %id, 9 ret i32 %op } -; GCN-LABEL: {{^}}test_bitcast_use_workitem_id_x: -; GCN: v_mov_b32_e32 v1, v0 +; GCN-LABEL: {{^}}test_bitcast_use_workitem_id_y: +; GCN: v_mov_b32_e32 v0, v1 ; GCN: s_getpc_b64 -; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, use_workitem_id_x@rel32@lo+4 -; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, use_workitem_id_x@rel32@hi+4 -; GCN: v_mov_b32_e32 v0, 9 +; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, use_workitem_id_y@rel32@lo+4 +; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, use_workitem_id_y@rel32@hi+4 ; GCN: s_swappc_b64 ; GCN: v_add_f32_e32 -define amdgpu_kernel void @test_bitcast_use_workitem_id_x() #0 { - %val = call float bitcast (i32(i32)* @use_workitem_id_x to float(i32)*)(i32 9) +define amdgpu_kernel void @test_bitcast_use_workitem_id_y() #0 { + %val = call float bitcast (i32()* @use_workitem_id_y to float()*)() %op = fadd float %val, 1.0 store volatile float %op, float addrspace(1)* undef ret void } +; GCN-LABEL: {{^}}use_dispatch_ptr_ret_type: +; GCN: v_mov_b32_e32 v{{[0-9]+}}, s6 +; GCN: v_mov_b32_e32 v{{[0-9]+}}, s7 +define i32 @use_dispatch_ptr_ret_type() #0 { + %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() + store volatile i8 addrspace(4)* %dispatch.ptr, i8 addrspace(4)* addrspace(1)* undef + ret i32 0 +} + +; GCN-LABEL: {{^}}indirect_use_dispatch_ptr_constexpr_cast_func: +; GCN: s_mov_b32 s4, s10 +; GCN: s_swappc_b64 +define amdgpu_kernel void @indirect_use_dispatch_ptr_constexpr_cast_func() #0 { + %f = call float bitcast (i32()* @use_dispatch_ptr_ret_type to float()*)() + %fadd = fadd float %f, 1.0 + store volatile float %fadd, float addrspace(1)* undef + ret void +} + declare i32 @extern_variadic(...) ; GCN-LABEL: {{^}}test_tail_call_bitcast_extern_variadic: @@ -58,7 +76,8 @@ ret i32 %call } -declare i32 @llvm.amdgcn.workitem.id.x() #1 +declare i32 @llvm.amdgcn.workitem.id.y() #1 +declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1 attributes #0 = { nounwind noinline } attributes #1 = { nounwind readnone speculatable }