diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2135,6 +2135,17 @@ LLVMMatchType<0>], // value for the inactive lanes to take [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; +// Similar to int_amdgcn_set_inactive, but the value for the inactive lanes must +// be a VGPR function argument. +// Can only be used in functions with the `amdgpu_cs_chain` or +// `amdgpu_cs_chain_preserve` calling conventions, and only in uniform control +// flow. +def int_amdgcn_set_inactive_chain_arg : + Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, // value to be copied + LLVMMatchType<0>], // value for the inactive lanes to take + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; + // Return if the given flat pointer points to a local memory address. def int_amdgcn_is_shared : ClangBuiltin<"__builtin_amdgcn_is_shared">, DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5987,6 +5987,30 @@ "VGPR arguments must not have the `inreg` attribute", &Call); break; } + case Intrinsic::amdgcn_set_inactive_chain_arg: { + auto CallerCC = Call.getCaller()->getCallingConv(); + switch (CallerCC) { + case CallingConv::AMDGPU_CS_Chain: + case CallingConv::AMDGPU_CS_ChainPreserve: + break; + default: + CheckFailed("Intrinsic can only be used from functions with the " + "amdgpu_cs_chain or amdgpu_cs_chain_preserve " + "calling conventions", + &Call); + break; + } + + unsigned InactiveIdx = 1; + Check(!Call.paramHasAttr(InactiveIdx, Attribute::InReg), + "Value for inactive lanes must not have the `inreg` attribute", + &Call); + Check(isa(Call.getArgOperand(InactiveIdx)), + "Value for inactive lanes must be a function argument", &Call); + Check(!cast(Call.getArgOperand(InactiveIdx))->hasInRegAttr(), + "Value for inactive lanes must be a VGPR function argument", &Call); + break; + } case Intrinsic::experimental_convergence_entry: LLVM_FALLTHROUGH; case Intrinsic::experimental_convergence_anchor: diff --git a/llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll b/llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll --- a/llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll +++ b/llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll @@ -1,6 +1,7 @@ ; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s declare void @llvm.amdgcn.cs.chain(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) noreturn +declare i32 @llvm.amdgcn.set.inactive.chain.arg(i32, i32) convergent willreturn nofree nocallback readnone define amdgpu_cs_chain void @bad_flags(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr, i32 %flags) { ; CHECK: immarg operand has non-immediate parameter @@ -32,6 +33,10 @@ } define void @bad_caller_default_cc(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) { + ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions + ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg + %unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1) + ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions ; CHECK-NEXT: @llvm.amdgcn.cs.chain call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0) @@ -39,6 +44,10 @@ } define amdgpu_kernel void @bad_caller_amdgpu_kernel(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) { + ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions + ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg + %unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1) + ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions ; CHECK-NEXT: @llvm.amdgcn.cs.chain call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0) @@ -46,6 +55,10 @@ } define amdgpu_gfx void @bad_caller_amdgpu_gfx(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) { + ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions + ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg + %unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1) + ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions ; CHECK-NEXT: @llvm.amdgcn.cs.chain call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0) @@ -53,8 +66,55 @@ } define amdgpu_vs void @bad_caller_amdgpu_vs(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) { + ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions + ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg + %unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1) + ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions ; CHECK-NEXT: @llvm.amdgcn.cs.chain call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0) unreachable } + +define amdgpu_cs void @bad_caller_amdgpu_cs(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) { + ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions + ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg + %unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1) + + ; Unlike llvm.amdgcn.set.inactive.chain.arg, llvm.amdgcn.cs.chain may be called from amdgpu_cs functions. + + ret void +} + +define amdgpu_cs_chain void @set_inactive_chain_arg_sgpr(ptr addrspace(1) %out, i32 %active, i32 inreg %inactive) { + ; CHECK: Value for inactive lanes must be a VGPR function argument + ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg + %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 %active, i32 %inactive) #0 + store i32 %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs_chain void @set_inactive_chain_arg_const(ptr addrspace(1) %out, i32 %active) { + ; CHECK: Value for inactive lanes must be a function argument + ; CHECK-NEXT: llvm.amdgcn.set.inactive.chain.arg + %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 %active, i32 29) #0 + store i32 %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs_chain void @set_inactive_chain_arg_computed(ptr addrspace(1) %out, i32 %active) { + ; CHECK: Value for inactive lanes must be a function argument + ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg + %inactive = add i32 %active, 127 + %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 %active, i32 %inactive) #0 + store i32 %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs_chain void @set_inactive_chain_arg_inreg(ptr addrspace(1) %out, i32 %active, i32 %inactive) { + ; CHECK: Value for inactive lanes must not have the `inreg` attribute + ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg + %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 %active, i32 inreg %inactive) #0 + store i32 %tmp, ptr addrspace(1) %out + ret void +}