Index: include/llvm/IR/CallingConv.h =================================================================== --- include/llvm/IR/CallingConv.h +++ include/llvm/IR/CallingConv.h @@ -190,6 +190,9 @@ /// Calling convention used for Mesa compute shaders. AMDGPU_CS = 90, + /// Calling convention for AMDGPU code object kernels. + AMDGPU_KERNEL = 91, + /// The highest possible calling convention ID. Must be some 2^k - 1. MaxID = 1023 }; Index: lib/AsmParser/LLLexer.cpp =================================================================== --- lib/AsmParser/LLLexer.cpp +++ lib/AsmParser/LLLexer.cpp @@ -604,6 +604,7 @@ KEYWORD(amdgpu_gs); KEYWORD(amdgpu_ps); KEYWORD(amdgpu_cs); + KEYWORD(amdgpu_kernel); KEYWORD(cc); KEYWORD(c); Index: lib/AsmParser/LLParser.cpp =================================================================== --- lib/AsmParser/LLParser.cpp +++ lib/AsmParser/LLParser.cpp @@ -1636,6 +1636,7 @@ /// ::= 'amdgpu_gs' /// ::= 'amdgpu_ps' /// ::= 'amdgpu_cs' +/// ::= 'amdgpu_kernel' /// ::= 'cc' UINT /// bool LLParser::ParseOptionalCallingConv(unsigned &CC) { @@ -1675,6 +1676,7 @@ case lltok::kw_amdgpu_gs: CC = CallingConv::AMDGPU_GS; break; case lltok::kw_amdgpu_ps: CC = CallingConv::AMDGPU_PS; break; case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break; + case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break; case lltok::kw_cc: { Lex.Lex(); return ParseUInt32(CC); Index: lib/AsmParser/LLToken.h =================================================================== --- lib/AsmParser/LLToken.h +++ lib/AsmParser/LLToken.h @@ -111,6 +111,7 @@ kw_amdgpu_gs, kw_amdgpu_ps, kw_amdgpu_cs, + kw_amdgpu_kernel, // Attributes: kw_attributes, Index: lib/IR/AsmWriter.cpp =================================================================== --- lib/IR/AsmWriter.cpp +++ lib/IR/AsmWriter.cpp @@ -332,6 +332,7 @@ case CallingConv::AMDGPU_GS: Out << "amdgpu_gs"; break; case CallingConv::AMDGPU_PS: Out << "amdgpu_ps"; break; case CallingConv::AMDGPU_CS: Out << "amdgpu_cs"; break; + case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break; } } Index: lib/Target/AMDGPU/AMDGPUMachineFunction.h =================================================================== --- lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -29,10 +29,7 @@ /// Start of implicit kernel args unsigned ABIArgOffset; - bool isKernel() const { - // FIXME: Assume everything is a kernel until function calls are supported. - return true; - } + bool isKernel() const; unsigned ScratchSize; bool IsKernel; Index: lib/Target/AMDGPU/AMDGPUMachineFunction.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -13,5 +13,12 @@ LDSSize(0), ABIArgOffset(0), ScratchSize(0), - IsKernel(true) { + IsKernel(MF.getFunction()->getCallingConv() == llvm::CallingConv::AMDGPU_KERNEL || + MF.getFunction()->getCallingConv() == llvm::CallingConv::SPIR_KERNEL) +{ +} + +bool AMDGPUMachineFunction::isKernel() const +{ + return IsKernel; } Index: test/Bitcode/compatibility.ll =================================================================== --- test/Bitcode/compatibility.ll +++ test/Bitcode/compatibility.ll @@ -428,6 +428,46 @@ ; CHECK: declare x86_vectorcallcc void @f.cc80() declare x86_vectorcallcc void @f.x86_vectorcallcc() ; CHECK: declare x86_vectorcallcc void @f.x86_vectorcallcc() +declare cc81 void @f.cc81() +; CHECK: declare hhvmcc void @f.cc81() +declare hhvmcc void @f.hhvmcc() +; CHECK: declare hhvmcc void @f.hhvmcc() +declare cc82 void @f.cc82() +; CHECK: declare hhvm_ccc void @f.cc82() +declare hhvm_ccc void @f.hhvm_ccc() +; CHECK: declare hhvm_ccc void @f.hhvm_ccc() +declare cc83 void @f.cc83() +; CHECK: declare x86_intrcc void @f.cc83() +declare x86_intrcc void @f.x86_intrcc() +; CHECK: declare x86_intrcc void @f.x86_intrcc() +declare cc84 void @f.cc84() +; CHECK: declare avr_intrcc void @f.cc84() +declare avr_intrcc void @f.avr_intrcc() +; CHECK: declare avr_intrcc void @f.avr_intrcc() +declare cc85 void @f.cc85() +; CHECK: declare avr_signalcc void @f.cc85() +declare avr_signalcc void @f.avr_signalcc() +; CHECK: declare avr_signalcc void @f.avr_signalcc() +declare cc87 void @f.cc87() +; CHECK: declare amdgpu_vs void @f.cc87() +declare amdgpu_vs void @f.amdgpu_vs() +; CHECK: declare amdgpu_vs void @f.amdgpu_vs() +declare cc88 void @f.cc88() +; CHECK: declare amdgpu_gs void @f.cc88() +declare amdgpu_gs void @f.amdgpu_gs() +; CHECK: declare amdgpu_gs void @f.amdgpu_gs() +declare cc89 void @f.cc89() +; CHECK: declare amdgpu_ps void @f.cc89() +declare amdgpu_ps void @f.amdgpu_ps() +; CHECK: declare amdgpu_ps void @f.amdgpu_ps() +declare cc90 void @f.cc90() +; CHECK: declare amdgpu_cs void @f.cc90() +declare amdgpu_cs void @f.amdgpu_cs() +; CHECK: declare amdgpu_cs void @f.amdgpu_cs() +declare cc91 void @f.cc91() +; CHECK: declare amdgpu_kernel void @f.cc91() +declare amdgpu_kernel void @f.amdgpu_kernel() +; CHECK: declare amdgpu_kernel void @f.amdgpu_kernel() declare cc1023 void @f.cc1023() ; CHECK: declare cc1023 void @f.cc1023() Index: test/CodeGen/AMDGPU/hsa-func.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-func.ll +++ test/CodeGen/AMDGPU/hsa-func.ll @@ -29,7 +29,7 @@ ; ELF: Symbol { ; ELF: Name: simple ; ELF: Size: 288 -; ELF: Type: AMDGPU_HSA_KERNEL (0xA) +; ELF: Type: Function (0x2) ; ELF: } ; HSA: .hsa_code_object_version 1,0 @@ -38,7 +38,7 @@ ; HSA: .hsatext -; HSA: .amdgpu_hsa_kernel simple +; HSA-NOT: .amdgpu_hsa_kernel simple ; HSA: {{^}}simple: ; HSA: .amd_kernel_code_t ; HSA: enable_sgpr_private_segment_buffer = 1 Index: test/CodeGen/AMDGPU/hsa.ll =================================================================== --- test/CodeGen/AMDGPU/hsa.ll +++ test/CodeGen/AMDGPU/hsa.ll @@ -56,7 +56,7 @@ ; HSA: .Lfunc_end0: ; HSA: .size simple, .Lfunc_end0-simple -define void @simple(i32 addrspace(1)* %out) { +define amdgpu_kernel void @simple(i32 addrspace(1)* %out) { entry: store i32 0, i32 addrspace(1)* %out ret void