Index: llvm/include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1850,6 +1850,28 @@ [IntrNoMem, IntrSpeculatable, NoCapture>] >; +// A uniform tail call to a function with the `amdgpu_cs_chain` or +// `amdgpu_cs_chain_preserve` calling convention. It will populate the SGPRs +// starting at s0 and the VGPRs starting at v8, set EXEC and perform a jump to +// the given function. +// Can only be used in functions with the `amdgpu_cs`, `amdgpu_cs_chain` or +// `amdgpu_cs_chain_preserve` calling conventions, and only in uniform control +// flow. +def int_amdgcn_cs_chain: + Intrinsic<[], + [llvm_ptr_ty, // The function to jump to. + llvm_anyint_ty, // Value to put in EXEC (should be i32 or i64). + llvm_any_ty, // Arguments that will be copied into SGPRs (s0+). + // Must be uniform. + llvm_any_ty, // Arguments that will be copied into VGPRs (v8+). + // Need not be uniform. + llvm_i32_ty, // Flags. + llvm_vararg_ty // Additional arguments. Only present if Flags is + // non-zero. + ], + [IntrNoReturn, ImmArg>]>; + + //===----------------------------------------------------------------------===// // CI+ Intrinsics //===----------------------------------------------------------------------===// Index: llvm/lib/IR/Verifier.cpp =================================================================== --- llvm/lib/IR/Verifier.cpp +++ llvm/lib/IR/Verifier.cpp @@ -86,6 +86,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/LLVMContext.h" @@ -5911,6 +5912,22 @@ &Call); break; } + case Intrinsic::amdgcn_cs_chain: { + auto CallerCC = Call.getCaller()->getCallingConv(); + switch (CallerCC) { + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_CS_Chain: + case CallingConv::AMDGPU_CS_ChainPreserve: + break; + default: + CheckFailed("Intrinsic can only be used from functions with the " + "amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve " + "calling conventions", + &Call); + break; + } + break; + } }; // Verify that there aren't any unmediated control transfers between funclets. Index: llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll =================================================================== --- /dev/null +++ llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll @@ -0,0 +1,46 @@ +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s + +declare void @llvm.amdgcn.cs.chain(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) noreturn + +define amdgpu_cs_chain void @bad_flags(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr, i32 %flags) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %flags + ; CHECK-NEXT: @llvm.amdgcn.cs.chain + call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 %flags) + ret void +} + +define amdgpu_cs_chain void @bad_exec(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr, i32 %flags) { + ; CHECK: Intrinsic called with incompatible signature + ; CHECK-NEXT: @llvm.amdgcn.cs.chain + call void(ptr, <4 x i32>, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, <4 x i32> %sgpr, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 %flags) + ret void +} + +define void @bad_caller_default_cc(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) { + ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions + ; CHECK-NEXT: @llvm.amdgcn.cs.chain + call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0) + ret void +} + +define amdgpu_kernel void @bad_caller_amdgpu_kernel(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) { + ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions + ; CHECK-NEXT: @llvm.amdgcn.cs.chain + call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0) + ret void +} + +define amdgpu_gfx void @bad_caller_amdgpu_gfx(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) { + ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions + ; CHECK-NEXT: @llvm.amdgcn.cs.chain + call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0) + ret void +} + +define amdgpu_vs void @bad_caller_amdgpu_vs(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) { + ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions + ; CHECK-NEXT: @llvm.amdgcn.cs.chain + call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0) + ret void +}