diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -170,6 +170,8 @@ kw_amdgpu_gs, kw_amdgpu_ps, kw_amdgpu_cs, + kw_amdgpu_cs_chain, + kw_amdgpu_cs_chain_preserve, kw_amdgpu_kernel, kw_amdgpu_gfx, kw_tailcc, diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -237,6 +237,14 @@ /// Preserve X2-X15, X19-X29, SP, Z0-Z31, P0-P15. AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 = 103, + /// Used on AMDGPUs to give the middle-end more control over argument + /// placement. + AMDGPU_CS_Chain = 104, + + /// Used on AMDGPUs to give the middle-end more control over argument + /// placement. + AMDGPU_CS_ChainPreserve = 105, + /// The highest possible ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1850,6 +1850,28 @@ [IntrNoMem, IntrSpeculatable, NoCapture>] >; +// A uniform tail call to a function with the `amdgpu_cs_chain` or +// `amdgpu_cs_chain_preserve` calling convention. It will populate the SGPRs +// starting at s0 and the VGPRs starting at v8, set EXEC and perform a jump to +// the given function. +// Can only be used in functions with the `amdgpu_cs`, `amdgpu_cs_chain` or +// `amdgpu_cs_chain_preserve` calling conventions, and only in uniform control +// flow. +def int_amdgcn_cs_chain: + Intrinsic<[], + [llvm_ptr_ty, // The function to jump to. + llvm_anyint_ty, // Value to put in EXEC (should be i32 or i64). + llvm_any_ty, // Arguments that will be copied into SGPRs (s0+). + // Must be uniform. + llvm_any_ty, // Arguments that will be copied into VGPRs (v8+). + // Need not be uniform. + llvm_i32_ty, // Flags. + llvm_vararg_ty // Additional arguments. Only present if Flags is + // non-zero. + ], + [IntrNoReturn, ImmArg>]>; + + //===----------------------------------------------------------------------===// // CI+ Intrinsics //===----------------------------------------------------------------------===// diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -627,6 +627,8 @@ KEYWORD(amdgpu_gs); KEYWORD(amdgpu_ps); KEYWORD(amdgpu_cs); + KEYWORD(amdgpu_cs_chain); + KEYWORD(amdgpu_cs_chain_preserve); KEYWORD(amdgpu_kernel); KEYWORD(amdgpu_gfx); KEYWORD(tailcc); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -2027,6 +2027,8 @@ /// ::= 'amdgpu_gs' /// ::= 'amdgpu_ps' /// ::= 'amdgpu_cs' +/// ::= 'amdgpu_cs_chain' +/// ::= 'amdgpu_cs_chain_preserve' /// ::= 'amdgpu_kernel' /// ::= 'tailcc' /// ::= 'cc' UINT @@ -2089,6 +2091,12 @@ case lltok::kw_amdgpu_gs: CC = CallingConv::AMDGPU_GS; break; case lltok::kw_amdgpu_ps: CC = CallingConv::AMDGPU_PS; break; case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break; + case lltok::kw_amdgpu_cs_chain: + CC = CallingConv::AMDGPU_CS_Chain; + break; + case lltok::kw_amdgpu_cs_chain_preserve: + CC = CallingConv::AMDGPU_CS_ChainPreserve; + break; case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break; case lltok::kw_tailcc: CC = CallingConv::Tail; break; case lltok::kw_cc: { diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -342,6 +342,12 @@ case CallingConv::AMDGPU_GS: Out << "amdgpu_gs"; break; case CallingConv::AMDGPU_PS: Out << "amdgpu_ps"; break; case CallingConv::AMDGPU_CS: Out << "amdgpu_cs"; break; + case CallingConv::AMDGPU_CS_Chain: + Out << "amdgpu_cs_chain"; + break; + case CallingConv::AMDGPU_CS_ChainPreserve: + Out << "amdgpu_cs_chain_preserve"; + break; case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break; case CallingConv::AMDGPU_Gfx: Out << "amdgpu_gfx"; break; } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -86,6 +86,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/LLVMContext.h" @@ -2579,6 +2580,8 @@ } case CallingConv::AMDGPU_KERNEL: case CallingConv::SPIR_KERNEL: + case CallingConv::AMDGPU_CS_Chain: + case CallingConv::AMDGPU_CS_ChainPreserve: Check(F.getReturnType()->isVoidTy(), "Calling convention requires void return type", &F); [[fallthrough]]; @@ -3285,6 +3288,18 @@ Check(Callee->getValueType() == FTy, "Intrinsic called with incompatible signature", Call); + // Disallow calls to functions with the amdgpu_cs_chain[_preserve] calling + // convention. + if (Callee) { + auto CC = Callee->getCallingConv(); + Check( + CC != CallingConv::AMDGPU_CS_Chain && + CC != CallingConv::AMDGPU_CS_ChainPreserve, + "Direct calls to amdgpu_cs_chain/amdgpu_cs_chain_preserve functions " + "not allowed. Please use the @llvm.amdgpu.cs.chain intrinsic instead.", + Call); + } + auto VerifyTypeAlign = [&](Type *Ty, const Twine &Message) { if (!Ty->isSized()) return; @@ -5897,6 +5912,22 @@ &Call); break; } + case Intrinsic::amdgcn_cs_chain: { + auto CallerCC = Call.getCaller()->getCallingConv(); + switch (CallerCC) { + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_CS_Chain: + case CallingConv::AMDGPU_CS_ChainPreserve: + break; + default: + CheckFailed("Intrinsic can only be used from functions with the " + "amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve " + "calling conventions", + &Call); + break; + } + break; + } }; // Verify that there aren't any unmediated control transfers between funclets. diff --git a/llvm/test/Assembler/amdgpu-cs-chain-cc.ll b/llvm/test/Assembler/amdgpu-cs-chain-cc.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Assembler/amdgpu-cs-chain-cc.ll @@ -0,0 +1,13 @@ +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s + +; CHECK: amdgpu_cs_chain void @amdgpu_cs_chain_cc +define amdgpu_cs_chain void @amdgpu_cs_chain_cc() { +entry: + ret void +} + +; CHECK: amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc +define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc() { +entry: + ret void +} diff --git a/llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll b/llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll @@ -0,0 +1,46 @@ +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s + +declare void @llvm.amdgcn.cs.chain(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) noreturn + +define amdgpu_cs_chain void @bad_flags(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr, i32 %flags) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %flags + ; CHECK-NEXT: @llvm.amdgcn.cs.chain + call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 %flags) + ret void +} + +define amdgpu_cs_chain void @bad_exec(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr, i32 %flags) { + ; CHECK: Intrinsic called with incompatible signature + ; CHECK-NEXT: @llvm.amdgcn.cs.chain + call void(ptr, <4 x i32>, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, <4 x i32> %sgpr, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 %flags) + ret void +} + +define void @bad_caller_default_cc(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) { + ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions + ; CHECK-NEXT: @llvm.amdgcn.cs.chain + call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0) + ret void +} + +define amdgpu_kernel void @bad_caller_amdgpu_kernel(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) { + ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions + ; CHECK-NEXT: @llvm.amdgcn.cs.chain + call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0) + ret void +} + +define amdgpu_gfx void @bad_caller_amdgpu_gfx(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) { + ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions + ; CHECK-NEXT: @llvm.amdgcn.cs.chain + call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0) + ret void +} + +define amdgpu_vs void @bad_caller_amdgpu_vs(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) { + ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions + ; CHECK-NEXT: @llvm.amdgcn.cs.chain + call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0) + ret void +} diff --git a/llvm/test/Verifier/amdgpu-cc.ll b/llvm/test/Verifier/amdgpu-cc.ll --- a/llvm/test/Verifier/amdgpu-cc.ll +++ b/llvm/test/Verifier/amdgpu-cc.ll @@ -127,3 +127,107 @@ define amdgpu_kernel void @byref_as5_cc_amdgpu_kernel(ptr addrspace(5) byref(i32) %ptr) { ret void } + +; CHECK: Calling convention requires void return type +; CHECK-NEXT: ptr @nonvoid_cc_amdgpu_cs_chain +define amdgpu_cs_chain i32 @nonvoid_cc_amdgpu_cs_chain() { + ret i32 0 +} + +; CHECK: Calling convention does not support varargs or perfect forwarding! +; CHECK-NEXT: ptr @varargs_amdgpu_cs_chain +define amdgpu_cs_chain void @varargs_amdgpu_cs_chain(...) { + ret void +} + +; CHECK: Calling convention does not allow sret +; CHECK-NEXT: ptr @sret_cc_amdgpu_cs_chain_as0 +define amdgpu_cs_chain void @sret_cc_amdgpu_cs_chain_as0(ptr sret(i32) %ptr) { + ret void +} + +; CHECK: Calling convention disallows byval +; CHECK-NEXT: ptr @byval_cc_amdgpu_cs_chain +define amdgpu_cs_chain void @byval_cc_amdgpu_cs_chain(ptr addrspace(1) byval(i32) %ptr) { + ret void +} + +; CHECK: Calling convention disallows stack byref +; CHECK-NEXT: ptr @byref_cc_amdgpu_cs_chain +define amdgpu_cs_chain void @byref_cc_amdgpu_cs_chain(ptr addrspace(5) byref(i32) %ptr) { + ret void +} + +; CHECK: Calling convention disallows preallocated +; CHECK-NEXT: ptr @preallocated_cc_amdgpu_cs_chain +define amdgpu_cs_chain void @preallocated_cc_amdgpu_cs_chain(ptr preallocated(i32) %ptr) { + ret void +} + +; CHECK: Calling convention disallows inalloca +; CHECK-NEXT: ptr @inalloca_cc_amdgpu_cs_chain +define amdgpu_cs_chain void @inalloca_cc_amdgpu_cs_chain(ptr inalloca(i32) %ptr) { + ret void +} + +; CHECK: Calling convention requires void return type +; CHECK-NEXT: ptr @nonvoid_cc_amdgpu_cs_chain_preserve +define amdgpu_cs_chain_preserve i32 @nonvoid_cc_amdgpu_cs_chain_preserve() { + ret i32 0 +} + +; CHECK: Calling convention does not support varargs or perfect forwarding! +; CHECK-NEXT: ptr @varargs_amdgpu_cs_chain_preserve +define amdgpu_cs_chain_preserve void @varargs_amdgpu_cs_chain_preserve(...) { + ret void +} + +; CHECK: Calling convention does not allow sret +; CHECK-NEXT: ptr @sret_cc_amdgpu_cs_chain_preserve_as0 +define amdgpu_cs_chain_preserve void @sret_cc_amdgpu_cs_chain_preserve_as0(ptr sret(i32) %ptr) { + ret void +} + +; CHECK: Calling convention does not allow sret +; CHECK-NEXT: ptr @sret_cc_amdgpu_cs_chain_preserve +define amdgpu_cs_chain_preserve void @sret_cc_amdgpu_cs_chain_preserve(ptr addrspace(5) sret(i32) %ptr) { + ret void +} + +; CHECK: Calling convention disallows byval +; CHECK-NEXT: ptr @byval_cc_amdgpu_cs_chain_preserve +define amdgpu_cs_chain_preserve void @byval_cc_amdgpu_cs_chain_preserve(ptr addrspace(1) byval(i32) %ptr) { + ret void +} + +; CHECK: Calling convention disallows stack byref +; CHECK-NEXT: ptr @byref_cc_amdgpu_cs_chain_preserve +define amdgpu_cs_chain_preserve void @byref_cc_amdgpu_cs_chain_preserve(ptr addrspace(5) byref(i32) %ptr) { + ret void +} + +; CHECK: Calling convention disallows preallocated +; CHECK-NEXT: ptr @preallocated_cc_amdgpu_cs_chain_preserve +define amdgpu_cs_chain_preserve void @preallocated_cc_amdgpu_cs_chain_preserve(ptr preallocated(i32) %ptr) { + ret void +} + +; CHECK: Calling convention disallows inalloca +; CHECK-NEXT: ptr @inalloca_cc_amdgpu_cs_chain_preserve +define amdgpu_cs_chain_preserve void @inalloca_cc_amdgpu_cs_chain_preserve(ptr inalloca(i32) %ptr) { + ret void +} + +declare amdgpu_cs_chain void @amdgpu_cs_chain_call_target() +declare amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_call_target() + +define amdgpu_cs_chain void @cant_call_amdgpu_cs_chain_functions() { + ; CHECK: Direct calls to amdgpu_cs_chain/amdgpu_cs_chain_preserve functions not allowed. Please use the @llvm.amdgpu.cs.chain intrinsic instead. + ; CHECK-NEXT: call void @amdgpu_cs_chain_call_target + call void @amdgpu_cs_chain_call_target() + + ; CHECK: Direct calls to amdgpu_cs_chain/amdgpu_cs_chain_preserve functions not allowed. Please use the @llvm.amdgpu.cs.chain intrinsic instead. + ; CHECK-NEXT: call void @amdgpu_cs_chain_preserve_call_target + call void @amdgpu_cs_chain_preserve_call_target() + ret void +}