diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -170,6 +170,8 @@ kw_amdgpu_gs, kw_amdgpu_ps, kw_amdgpu_cs, + kw_amdgpu_cs_chain, + kw_amdgpu_cs_chain_preserve, kw_amdgpu_kernel, kw_amdgpu_gfx, kw_tailcc, diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -237,6 +237,14 @@ /// Preserve X2-X15, X19-X29, SP, Z0-Z31, P0-P15. AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 = 103, + /// Used on AMDGPUs to give the middle-end more control over argument + /// placement. + AMDGPU_CS_Chain = 104, + + /// Used on AMDGPUs to give the middle-end more control over argument + /// placement. + AMDGPU_CS_ChainPreserve = 105, + /// The highest possible ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -627,6 +627,8 @@ KEYWORD(amdgpu_gs); KEYWORD(amdgpu_ps); KEYWORD(amdgpu_cs); + KEYWORD(amdgpu_cs_chain); + KEYWORD(amdgpu_cs_chain_preserve); KEYWORD(amdgpu_kernel); KEYWORD(amdgpu_gfx); KEYWORD(tailcc); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -2027,6 +2027,8 @@ /// ::= 'amdgpu_gs' /// ::= 'amdgpu_ps' /// ::= 'amdgpu_cs' +/// ::= 'amdgpu_cs_chain' +/// ::= 'amdgpu_cs_chain_preserve' /// ::= 'amdgpu_kernel' /// ::= 'tailcc' /// ::= 'cc' UINT @@ -2089,6 +2091,12 @@ case lltok::kw_amdgpu_gs: CC = CallingConv::AMDGPU_GS; break; case lltok::kw_amdgpu_ps: CC = CallingConv::AMDGPU_PS; break; case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break; + case lltok::kw_amdgpu_cs_chain: + CC = CallingConv::AMDGPU_CS_Chain; + break; + case lltok::kw_amdgpu_cs_chain_preserve: + CC = CallingConv::AMDGPU_CS_ChainPreserve; + break; case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break; case lltok::kw_tailcc: CC = CallingConv::Tail; break; case lltok::kw_cc: { diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -342,6 +342,12 @@ case CallingConv::AMDGPU_GS: Out << "amdgpu_gs"; break; case CallingConv::AMDGPU_PS: Out << "amdgpu_ps"; break; case CallingConv::AMDGPU_CS: Out << "amdgpu_cs"; break; + case CallingConv::AMDGPU_CS_Chain: + Out << "amdgpu_cs_chain"; + break; + case CallingConv::AMDGPU_CS_ChainPreserve: + Out << "amdgpu_cs_chain_preserve"; + break; case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break; case CallingConv::AMDGPU_Gfx: Out << "amdgpu_gfx"; break; } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2579,6 +2579,8 @@ } case CallingConv::AMDGPU_KERNEL: case CallingConv::SPIR_KERNEL: + case CallingConv::AMDGPU_CS_Chain: + case CallingConv::AMDGPU_CS_ChainPreserve: Check(F.getReturnType()->isVoidTy(), "Calling convention requires void return type", &F); [[fallthrough]]; @@ -3285,6 +3287,15 @@ Check(Callee->getValueType() == FTy, "Intrinsic called with incompatible signature", Call); + // Disallow calls to functions with the amdgpu_cs_chain[_preserve] calling + // convention. + auto CC = Call.getCallingConv(); + Check(CC != CallingConv::AMDGPU_CS_Chain && + CC != CallingConv::AMDGPU_CS_ChainPreserve, + "Direct calls to amdgpu_cs_chain/amdgpu_cs_chain_preserve functions " + "not allowed. Please use the @llvm.amdgpu.cs.chain intrinsic instead.", + Call); + auto VerifyTypeAlign = [&](Type *Ty, const Twine &Message) { if (!Ty->isSized()) return; diff --git a/llvm/test/Assembler/amdgpu-cs-chain-cc.ll b/llvm/test/Assembler/amdgpu-cs-chain-cc.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Assembler/amdgpu-cs-chain-cc.ll @@ -0,0 +1,13 @@ +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s + +; CHECK: amdgpu_cs_chain void @amdgpu_cs_chain_cc +define amdgpu_cs_chain void @amdgpu_cs_chain_cc() { +entry: + ret void +} + +; CHECK: amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc +define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc() { +entry: + ret void +} diff --git a/llvm/test/Verifier/amdgpu-cc.ll b/llvm/test/Verifier/amdgpu-cc.ll --- a/llvm/test/Verifier/amdgpu-cc.ll +++ b/llvm/test/Verifier/amdgpu-cc.ll @@ -127,3 +127,107 @@ define amdgpu_kernel void @byref_as5_cc_amdgpu_kernel(ptr addrspace(5) byref(i32) %ptr) { ret void } + +; CHECK: Calling convention requires void return type +; CHECK-NEXT: ptr @nonvoid_cc_amdgpu_cs_chain +define amdgpu_cs_chain i32 @nonvoid_cc_amdgpu_cs_chain() { + ret i32 0 +} + +; CHECK: Calling convention does not support varargs or perfect forwarding! +; CHECK-NEXT: ptr @varargs_amdgpu_cs_chain +define amdgpu_cs_chain void @varargs_amdgpu_cs_chain(...) { + ret void +} + +; CHECK: Calling convention does not allow sret +; CHECK-NEXT: ptr @sret_cc_amdgpu_cs_chain_as0 +define amdgpu_cs_chain void @sret_cc_amdgpu_cs_chain_as0(ptr sret(i32) %ptr) { + ret void +} + +; CHECK: Calling convention disallows byval +; CHECK-NEXT: ptr @byval_cc_amdgpu_cs_chain +define amdgpu_cs_chain void @byval_cc_amdgpu_cs_chain(ptr addrspace(1) byval(i32) %ptr) { + ret void +} + +; CHECK: Calling convention disallows stack byref +; CHECK-NEXT: ptr @byref_cc_amdgpu_cs_chain +define amdgpu_cs_chain void @byref_cc_amdgpu_cs_chain(ptr addrspace(5) byref(i32) %ptr) { + ret void +} + +; CHECK: Calling convention disallows preallocated +; CHECK-NEXT: ptr @preallocated_cc_amdgpu_cs_chain +define amdgpu_cs_chain void @preallocated_cc_amdgpu_cs_chain(ptr preallocated(i32) %ptr) { + ret void +} + +; CHECK: Calling convention disallows inalloca +; CHECK-NEXT: ptr @inalloca_cc_amdgpu_cs_chain +define amdgpu_cs_chain void @inalloca_cc_amdgpu_cs_chain(ptr inalloca(i32) %ptr) { + ret void +} + +; CHECK: Calling convention requires void return type +; CHECK-NEXT: ptr @nonvoid_cc_amdgpu_cs_chain_preserve +define amdgpu_cs_chain_preserve i32 @nonvoid_cc_amdgpu_cs_chain_preserve() { + ret i32 0 +} + +; CHECK: Calling convention does not support varargs or perfect forwarding! +; CHECK-NEXT: ptr @varargs_amdgpu_cs_chain_preserve +define amdgpu_cs_chain_preserve void @varargs_amdgpu_cs_chain_preserve(...) { + ret void +} + +; CHECK: Calling convention does not allow sret +; CHECK-NEXT: ptr @sret_cc_amdgpu_cs_chain_preserve_as0 +define amdgpu_cs_chain_preserve void @sret_cc_amdgpu_cs_chain_preserve_as0(ptr sret(i32) %ptr) { + ret void +} + +; CHECK: Calling convention does not allow sret +; CHECK-NEXT: ptr @sret_cc_amdgpu_cs_chain_preserve +define amdgpu_cs_chain_preserve void @sret_cc_amdgpu_cs_chain_preserve(ptr addrspace(5) sret(i32) %ptr) { + ret void +} + +; CHECK: Calling convention disallows byval +; CHECK-NEXT: ptr @byval_cc_amdgpu_cs_chain_preserve +define amdgpu_cs_chain_preserve void @byval_cc_amdgpu_cs_chain_preserve(ptr addrspace(1) byval(i32) %ptr) { + ret void +} + +; CHECK: Calling convention disallows stack byref +; CHECK-NEXT: ptr @byref_cc_amdgpu_cs_chain_preserve +define amdgpu_cs_chain_preserve void @byref_cc_amdgpu_cs_chain_preserve(ptr addrspace(5) byref(i32) %ptr) { + ret void +} + +; CHECK: Calling convention disallows preallocated +; CHECK-NEXT: ptr @preallocated_cc_amdgpu_cs_chain_preserve +define amdgpu_cs_chain_preserve void @preallocated_cc_amdgpu_cs_chain_preserve(ptr preallocated(i32) %ptr) { + ret void +} + +; CHECK: Calling convention disallows inalloca +; CHECK-NEXT: ptr @inalloca_cc_amdgpu_cs_chain_preserve +define amdgpu_cs_chain_preserve void @inalloca_cc_amdgpu_cs_chain_preserve(ptr inalloca(i32) %ptr) { + ret void +} + +declare amdgpu_cs_chain void @amdgpu_cs_chain_call_target() +declare amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_call_target() + +define amdgpu_cs_chain void @cant_call_amdgpu_cs_chain_functions() { + ; CHECK: Direct calls to amdgpu_cs_chain/amdgpu_cs_chain_preserve functions not allowed. Please use the @llvm.amdgpu.cs.chain intrinsic instead. + ; CHECK-NEXT: call amdgpu_cs_chain + call amdgpu_cs_chain void @amdgpu_cs_chain_call_target() + + ; CHECK: Direct calls to amdgpu_cs_chain/amdgpu_cs_chain_preserve functions not allowed. Please use the @llvm.amdgpu.cs.chain intrinsic instead. + ; CHECK-NEXT: call amdgpu_cs_chain_preserve + call amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_call_target() + ret void +}