Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -773,9 +773,19 @@ } else { // We force CodeGen to run in SCC order, so the callee's register // usage etc. should be the cumulative usage of all callees. + auto I = CallGraphResourceInfo.find(Callee); - assert(I != CallGraphResourceInfo.end() && - "callee should have been handled before caller"); + if (I == CallGraphResourceInfo.end()) { + // Avoid crashing on undefined behavior with an illegal call to a + // kernel. If a callsite's calling convention doesn't match the + // function's, it's undefined behavior. If the callsite calling + // convention does match, that would have errored earlier. + // FIXME: The verifier shouldn't allow this. + if (AMDGPU::isEntryFunctionCC(Callee->getCallingConv())) + report_fatal_error("invalid call to entry function"); + + llvm_unreachable("callee should have been handled before caller"); + } MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR); MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR); Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -848,9 +848,6 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) { switch (CC) { - case CallingConv::AMDGPU_KERNEL: - case CallingConv::SPIR_KERNEL: - llvm_unreachable("kernels should not be handled here"); case CallingConv::AMDGPU_VS: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: @@ -863,8 +860,10 @@ case CallingConv::Fast: case CallingConv::Cold: return CC_AMDGPU_Func; + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: default: - report_fatal_error("Unsupported calling convention."); + report_fatal_error("Unsupported calling convention for call"); } } Index: test/CodeGen/AMDGPU/call-to-kernel-undefined.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/call-to-kernel-undefined.ll @@ -0,0 +1,20 @@ +; RUN: not llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s + +; FIXME: It should be invalid IR to have a call to a kernel, but this +; is currently relied on, but should be eliminated before codegen. +define amdgpu_kernel void @callee_kernel(i32 addrspace(1)* %out) #0 { +entry: + store volatile i32 0, i32 addrspace(1)* %out + ret void +} + +; Make sure there's no crash when the callsite calling convention +; doesn't match. +; CHECK: LLVM ERROR: invalid call to entry function +define amdgpu_kernel void @caller_kernel(i32 addrspace(1)* %out) #0 { +entry: + call void @callee_kernel(i32 addrspace(1)* %out) + ret void +} + +attributes #0 = { nounwind noinline } Index: test/CodeGen/AMDGPU/call-to-kernel.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/call-to-kernel.ll @@ -0,0 +1,18 @@ +; RUN: not llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s + +; FIXME: It should be invalid IR to have a call to a kernel, but this +; is currently relied on, but should be eliminated before codegen. +define amdgpu_kernel void @callee_kernel(i32 addrspace(1)* %out) #0 { +entry: + store volatile i32 0, i32 addrspace(1)* %out + ret void +} + +; CHECK: LLVM ERROR: Unsupported calling convention for call +define amdgpu_kernel void @caller_kernel(i32 addrspace(1)* %out) #0 { +entry: + call amdgpu_kernel void @callee_kernel(i32 addrspace(1)* %out) + ret void +} + +attributes #0 = { nounwind noinline } Index: test/CodeGen/AMDGPU/inline-calls.ll =================================================================== --- test/CodeGen/AMDGPU/inline-calls.ll +++ test/CodeGen/AMDGPU/inline-calls.ll @@ -3,7 +3,7 @@ ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s ; CHECK-NOT: {{^}}func: -define internal fastcc i32 @func(i32 %a) { +define internal i32 @func(i32 %a) { entry: %tmp0 = add i32 %a, 1 ret i32 %tmp0 @@ -18,14 +18,6 @@ ret void } -; CHECK: {{^}}kernel2: -; CHECK-NOT: call -define amdgpu_kernel void @kernel2(i32 addrspace(1)* %out) { -entry: - call void @kernel(i32 addrspace(1)* %out) - ret void -} - ; CHECK-NOT: func_alias @func_alias = alias i32 (i32), i32 (i32)* @func @@ -37,14 +29,3 @@ store i32 %tmp0, i32 addrspace(1)* %out ret void } - -; CHECK-NOT: kernel_alias -@kernel_alias = alias void (i32 addrspace(1)*), void (i32 addrspace(1)*)* @kernel - -; CHECK: {{^}}kernel4: -; CHECK-NOT: call -define amdgpu_kernel void @kernel4(i32 addrspace(1)* %out) { -entry: - call void @kernel_alias(i32 addrspace(1)* %out) - ret void -}