Diff 552570

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Show All 11 Lines

#include "AMDGPU.h"		#include "AMDGPU.h"
#include "GCNSubtarget.h"		#include "GCNSubtarget.h"
#include "Utils/AMDGPUBaseInfo.h"		#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CycleAnalysis.h"		#include "llvm/Analysis/CycleAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"		#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"		#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"		#include "llvm/IR/IntrinsicsR600.h"
		#include "llvm/IR/LLVMContext.h"
		#include "llvm/IR/MDBuilder.h"
		#include "llvm/IR/Metadata.h"
		#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"		#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO/Attributor.h"		#include "llvm/Transforms/IPO/Attributor.h"

#define DEBUG_TYPE "amdgpu-attributor"		#define DEBUG_TYPE "amdgpu-attributor"

namespace llvm {		namespace llvm {
void initializeCycleInfoWrapperPassPass(PassRegistry &);		void initializeCycleInfoWrapperPassPass(PassRegistry &);
}		}

using namespace llvm;		using namespace llvm;

		static cl::opt<unsigned> KernargPreloadCount(
		"amdgpu-kernarg-preload-count",
		cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0));

#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,		#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,

enum ImplicitArgumentPositions {		enum ImplicitArgumentPositions {
#include "AMDGPUAttributes.def"		#include "AMDGPUAttributes.def"
LAST_ARG_POS		LAST_ARG_POS
};		};

#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,		#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
▲ Show 20 Lines • Show All 870 Lines • ▼ Show 20 Lines

AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,		AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
Attributor &A) {		Attributor &A) {
if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)		if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);		return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
llvm_unreachable("AAAMDWavesPerEU is only valid for function position");		llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
}		}

		static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
		arsenmUnsubmitted Not Done Reply Inline Actions This isn't really gaining value by being in the attributor as it stands. Are you planning on a more sophisticated user analysis to select the starting point? arsenm: This isn't really gaining value by being in the attributor as it stands. Are you planning on a…
		kerbowaAuthorUnsubmitted Not Done Reply Inline Actions Yes, that's the plan. Any heuristics of that sort will be implemented here. kerbowa: Yes, that's the plan. Any heuristics of that sort will be implemented here.
		const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
		for (unsigned I = 0;
		arsenmUnsubmitted Not Done Reply Inline Actions Assuming 1 register : 1 argument which is not the case. Also the user SGPR count went up (in gfx10 I think?) so you should query the number for that arsenm: Assuming 1 register : 1 argument which is not the case. Also the user SGPR count went up (in…
		rampitecUnsubmitted Not Done Reply Inline Actions AFAIR it was increased to 32 in gfx9, then again was 16 in gfx10. I also do not think this is implemented. rampitec: AFAIR it was increased to 32 in gfx9, then again was 16 in gfx10. I also do not think this is…
		arsenmUnsubmitted Not Done Reply Inline Actions We already have getMaxNumUserSGPRs but it's just hardcoded arsenm: We already have getMaxNumUserSGPRs but it's just hardcoded
		kerbowaAuthorUnsubmitted Not Done Reply Inline Actions InReg is just a hint so being super accurate with the number of available SGPRs isn't needed for correctness here. The intent is just to have it stop at the maximum, but if it goes over it doesn't matter. I wasn't aware that the number of available user SGPRs had changed i.e. at https://llvm.org/docs/AMDGPUUsage.html I just see 16, I've only ever heard the number 16 talked about. Anyway, I will change this to getMaxNumUserSGPRs as I look into what's actually supported more. kerbowa: InReg is just a hint so being super accurate with the number of available SGPRs isn't needed…
		I < F.arg_size() &&
		I < std::min(KernargPreloadCount.getValue(), ST.getMaxNumUserSGPRs());
		++I) {
		Argument &Arg = *F.getArg(I);
		// Check for incompatible attributes.
		if (Arg.hasByRefAttr() \|\| Arg.hasNestAttr())
		arsenmUnsubmitted Done Reply Inline Actions Could actually handle byref (and should, ideally all kernargs would use byref). If we consistently used byref, you could implement this by moving memory values to the arg list arsenm: Could actually handle byref (and should, ideally all kernargs would use byref). If we…
		kerbowaAuthorUnsubmitted Done Reply Inline Actions I'm planning on adding it in a future revision but I don't think it's needed for this iteration of the changes. kerbowa: I'm planning on adding it in a future revision but I don't think it's needed for this iteration…
		break;

		Arg.addAttr(Attribute::InReg);
		}

		if (KernargPreloadCount > 0) {
		LLVM_DEBUG(dbgs() << "Kernel \"" << F.getName()
		<< "\" will attempt to preload " << KernargPreloadCount
		<< " kernel arguemnts.\n");
		arsenmUnsubmitted Done Reply Inline Actions Typo arguemnts arsenm: Typo arguemnts
		LLVMContext &Ctx = F.getParent()->getContext();
		MDBuilder MDB(Ctx);
		F.setMetadata("preload_kernel_args",
		arsenmUnsubmitted Done Reply Inline Actions I don't understand why you need this metadata. You mark the arguments with inreg which provides the same information? You can still ignore the hint in the codegen arsenm: I don't understand why you need this metadata. You mark the arguments with inreg which provides…
		kerbowaAuthorUnsubmitted Done Reply Inline Actions I'm adding the metadata early to ensure that kernarg preloading is constrained to those that are explicitly using the CL flag. In case there are some frontend that may sneak inreg on some arguments. Will probably just until the feature is more mature or is enabled by default. kerbowa: I'm adding the metadata early to ensure that kernarg preloading is constrained to those that…
		arsenmUnsubmitted Not Done Reply Inline Actions This is an anti feature. Just respect inreg regardless of the source. You're just adding a huge source of fragility just in case a frontend tries to make use of an in progress feature arsenm: This is an anti feature. Just respect inreg regardless of the source. You're just adding a huge…
		MDNode::get(Ctx, MDB.createConstant(llvm::ConstantInt::get(
		IntegerType::getInt32Ty(Ctx),
		KernargPreloadCount))));
		}
		}

class AMDGPUAttributor : public ModulePass {		class AMDGPUAttributor : public ModulePass {
public:		public:
AMDGPUAttributor() : ModulePass(ID) {}		AMDGPUAttributor() : ModulePass(ID) {}

/// doInitialization - Virtual method overridden by subclasses to do		/// doInitialization - Virtual method overridden by subclasses to do
/// any necessary initialization before any pass is run.		/// any necessary initialization before any pass is run.
bool doInitialization(Module &) override {		bool doInitialization(Module &) override {
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();		auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
Show All 30 Lines	bool runOnModule(Module &M) override {
};		};

Attributor A(Functions, InfoCache, AC);		Attributor A(Functions, InfoCache, AC);

for (Function &F : M) {		for (Function &F : M) {
if (!F.isIntrinsic()) {		if (!F.isIntrinsic()) {
A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));		A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));		A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {		CallingConv::ID CC = F.getCallingConv();
		if (!AMDGPU::isEntryFunctionCC(CC)) {
A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));		A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(F));		A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(F));
		} else if (CC == CallingConv::AMDGPU_KERNEL) {
		arsenmUnsubmitted Done Reply Inline Actions Don't need the arg_empty check, the implementation handles it anyway and it's uncommon arsenm: Don't need the arg_empty check, the implementation handles it anyway and it's uncommon
		addPreloadKernArgHint(F, *TM);
}		}
}		}
}		}

ChangeStatus Change = A.run();		ChangeStatus Change = A.run();
return Change == ChangeStatus::CHANGED;		return Change == ChangeStatus::CHANGED;
}		}

Show All 18 Lines

llvm/test/CodeGen/AMDGPU/preload-kernal-args-inreg-hints.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
				; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-attributor -S < %s \| FileCheck -check-prefix=NO-PRELOAD %s
				; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=1 -amdgpu-attributor -S < %s \| FileCheck -check-prefix=PRELOAD-1 %s
				arsenmUnsubmitted Not Done Reply Inline Actions Use -passes arsenm: Use -passes
				kerbowaAuthorUnsubmitted Not Done Reply Inline Actions I think it won't work for amdgpu-attributor with old pass manager? kerbowa: I think it won't work for amdgpu-attributor with old pass manager?
				arsenmUnsubmitted Not Done Reply Inline Actions Apparently it hasn't been ported yet arsenm: Apparently it hasn't been ported yet
				; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=3 -amdgpu-attributor -S < %s \| FileCheck -check-prefix=PRELOAD-3 %s
				; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=16 -amdgpu-attributor -S < %s \| FileCheck -check-prefix=PRELOAD-16 %s
				; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=20 -amdgpu-attributor -S < %s \| FileCheck -check-prefix=PRELOAD-20 %s

				define amdgpu_kernel void @test_preload_hint_kernel_1(ptr %0) #0 {
				; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1
				; NO-PRELOAD-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
				; NO-PRELOAD-NEXT: ret void
				;
				; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1
				; PRELOAD-1-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !preload_kernel_args !0 {
				; PRELOAD-1-NEXT: ret void
				;
				; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1
				; PRELOAD-3-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !preload_kernel_args !0 {
				; PRELOAD-3-NEXT: ret void
				;
				; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1
				; PRELOAD-16-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !preload_kernel_args !0 {
				; PRELOAD-16-NEXT: ret void
				;
				; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1
				; PRELOAD-20-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !preload_kernel_args !0 {
				; PRELOAD-20-NEXT: ret void
				;
				ret void
				}

				define amdgpu_kernel void @test_preload_hint_kernel_2(i32 %0, i64 %1) #0 {
				; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2
				; NO-PRELOAD-SAME: (i32 [[TMP0:%.]], i64 [[TMP1:%.]]) #[[ATTR0]] {
				; NO-PRELOAD-NEXT: ret void
				;
				; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2
				; PRELOAD-1-SAME: (i32 inreg [[TMP0:%.]], i64 [[TMP1:%.]]) #[[ATTR0]] !preload_kernel_args !0 {
				; PRELOAD-1-NEXT: ret void
				;
				; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2
				; PRELOAD-3-SAME: (i32 inreg [[TMP0:%.]], i64 inreg [[TMP1:%.]]) #[[ATTR0]] !preload_kernel_args !0 {
				; PRELOAD-3-NEXT: ret void
				;
				; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2
				; PRELOAD-16-SAME: (i32 inreg [[TMP0:%.]], i64 inreg [[TMP1:%.]]) #[[ATTR0]] !preload_kernel_args !0 {
				; PRELOAD-16-NEXT: ret void
				;
				; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2
				; PRELOAD-20-SAME: (i32 inreg [[TMP0:%.]], i64 inreg [[TMP1:%.]]) #[[ATTR0]] !preload_kernel_args !0 {
				; PRELOAD-20-NEXT: ret void
				;
				ret void
				}

				define amdgpu_kernel void @test_preload_hint_kernel_4(i32 %0, i64 %1, <2 x float> %2, ptr %3) #0 {
				; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4
				; NO-PRELOAD-SAME: (i32 [[TMP0:%.]], i64 [[TMP1:%.]], <2 x float> [[TMP2:%.]], ptr [[TMP3:%.]]) #[[ATTR0]] {
				; NO-PRELOAD-NEXT: ret void
				;
				; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4
				; PRELOAD-1-SAME: (i32 inreg [[TMP0:%.]], i64 [[TMP1:%.]], <2 x float> [[TMP2:%.]], ptr [[TMP3:%.]]) #[[ATTR0]] !preload_kernel_args !0 {
				; PRELOAD-1-NEXT: ret void
				;
				; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4
				; PRELOAD-3-SAME: (i32 inreg [[TMP0:%.]], i64 inreg [[TMP1:%.]], <2 x float> inreg [[TMP2:%.]], ptr [[TMP3:%.]]) #[[ATTR0]] !preload_kernel_args !0 {
				; PRELOAD-3-NEXT: ret void
				;
				; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4
				; PRELOAD-16-SAME: (i32 inreg [[TMP0:%.]], i64 inreg [[TMP1:%.]], <2 x float> inreg [[TMP2:%.]], ptr inreg [[TMP3:%.]]) #[[ATTR0]] !preload_kernel_args !0 {
				; PRELOAD-16-NEXT: ret void
				;
				; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4
				; PRELOAD-20-SAME: (i32 inreg [[TMP0:%.]], i64 inreg [[TMP1:%.]], <2 x float> inreg [[TMP2:%.]], ptr inreg [[TMP3:%.]]) #[[ATTR0]] !preload_kernel_args !0 {
				; PRELOAD-20-NEXT: ret void
				;
				ret void
				}

				define amdgpu_kernel void @test_preload_hint_kernel_18(i32 %0, i64 %1, <2 x float> %2, ptr %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32 %14, i32 %15, i32 %16, i32 %17) #0 {
				; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18
				; NO-PRELOAD-SAME: (i32 [[TMP0:%.]], i64 [[TMP1:%.]], <2 x float> [[TMP2:%.]], ptr [[TMP3:%.]], i32 [[TMP4:%.]], i32 [[TMP5:%.]], i32 [[TMP6:%.]], i32 [[TMP7:%.]], i32 [[TMP8:%.]], i32 [[TMP9:%.]], i32 [[TMP10:%.]], i32 [[TMP11:%.]], i32 [[TMP12:%.]], i32 [[TMP13:%.]], i32 [[TMP14:%.]], i32 [[TMP15:%.]], i32 [[TMP16:%.]], i32 [[TMP17:%.]]) #[[ATTR0]] {
				; NO-PRELOAD-NEXT: ret void
				;
				; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18
				; PRELOAD-1-SAME: (i32 inreg [[TMP0:%.]], i64 [[TMP1:%.]], <2 x float> [[TMP2:%.]], ptr [[TMP3:%.]], i32 [[TMP4:%.]], i32 [[TMP5:%.]], i32 [[TMP6:%.]], i32 [[TMP7:%.]], i32 [[TMP8:%.]], i32 [[TMP9:%.]], i32 [[TMP10:%.]], i32 [[TMP11:%.]], i32 [[TMP12:%.]], i32 [[TMP13:%.]], i32 [[TMP14:%.]], i32 [[TMP15:%.]], i32 [[TMP16:%.]], i32 [[TMP17:%.]]) #[[ATTR0]] !preload_kernel_args !0 {
				; PRELOAD-1-NEXT: ret void
				;
				; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18
				; PRELOAD-3-SAME: (i32 inreg [[TMP0:%.]], i64 inreg [[TMP1:%.]], <2 x float> inreg [[TMP2:%.]], ptr [[TMP3:%.]], i32 [[TMP4:%.]], i32 [[TMP5:%.]], i32 [[TMP6:%.]], i32 [[TMP7:%.]], i32 [[TMP8:%.]], i32 [[TMP9:%.]], i32 [[TMP10:%.]], i32 [[TMP11:%.]], i32 [[TMP12:%.]], i32 [[TMP13:%.]], i32 [[TMP14:%.]], i32 [[TMP15:%.]], i32 [[TMP16:%.]], i32 [[TMP17:%.]]) #[[ATTR0]] !preload_kernel_args !0 {
				; PRELOAD-3-NEXT: ret void
				;
				; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18
				; PRELOAD-16-SAME: (i32 inreg [[TMP0:%.]], i64 inreg [[TMP1:%.]], <2 x float> inreg [[TMP2:%.]], ptr inreg [[TMP3:%.]], i32 inreg [[TMP4:%.]], i32 inreg [[TMP5:%.]], i32 inreg [[TMP6:%.]], i32 inreg [[TMP7:%.]], i32 inreg [[TMP8:%.]], i32 inreg [[TMP9:%.]], i32 inreg [[TMP10:%.]], i32 inreg [[TMP11:%.]], i32 inreg [[TMP12:%.]], i32 inreg [[TMP13:%.]], i32 inreg [[TMP14:%.]], i32 inreg [[TMP15:%.]], i32 [[TMP16:%.]], i32 [[TMP17:%.]]) #[[ATTR0]] !preload_kernel_args !0 {
				; PRELOAD-16-NEXT: ret void
				;
				; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18
				; PRELOAD-20-SAME: (i32 inreg [[TMP0:%.]], i64 inreg [[TMP1:%.]], <2 x float> inreg [[TMP2:%.]], ptr inreg [[TMP3:%.]], i32 inreg [[TMP4:%.]], i32 inreg [[TMP5:%.]], i32 inreg [[TMP6:%.]], i32 inreg [[TMP7:%.]], i32 inreg [[TMP8:%.]], i32 inreg [[TMP9:%.]], i32 inreg [[TMP10:%.]], i32 inreg [[TMP11:%.]], i32 inreg [[TMP12:%.]], i32 inreg [[TMP13:%.]], i32 inreg [[TMP14:%.]], i32 inreg [[TMP15:%.]], i32 [[TMP16:%.]], i32 [[TMP17:%.]]) #[[ATTR0]] !preload_kernel_args !0 {
				; PRELOAD-20-NEXT: ret void
				;
				ret void
				}

				define void @test_preload_hint_non_kernel_2(i32 %0, i64 %1) #0 {
				; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2
				; NO-PRELOAD-SAME: (i32 [[TMP0:%.]], i64 [[TMP1:%.]]) #[[ATTR1:[0-9]+]] {
				; NO-PRELOAD-NEXT: ret void
				;
				; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2
				; PRELOAD-1-SAME: (i32 [[TMP0:%.]], i64 [[TMP1:%.]]) #[[ATTR1:[0-9]+]] {
				; PRELOAD-1-NEXT: ret void
				;
				; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2
				; PRELOAD-3-SAME: (i32 [[TMP0:%.]], i64 [[TMP1:%.]]) #[[ATTR1:[0-9]+]] {
				; PRELOAD-3-NEXT: ret void
				;
				; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2
				; PRELOAD-16-SAME: (i32 [[TMP0:%.]], i64 [[TMP1:%.]]) #[[ATTR1:[0-9]+]] {
				; PRELOAD-16-NEXT: ret void
				;
				; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2
				; PRELOAD-20-SAME: (i32 [[TMP0:%.]], i64 [[TMP1:%.]]) #[[ATTR1:[0-9]+]] {
				; PRELOAD-20-NEXT: ret void
				;
				ret void
				}

				define amdgpu_kernel void @test_preload_hint_kernel_1_call_func(ptr %0) #0 {
				; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func
				; NO-PRELOAD-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
				; NO-PRELOAD-NEXT: call void @func(ptr [[TMP0]])
				; NO-PRELOAD-NEXT: ret void
				;
				; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func
				; PRELOAD-1-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !preload_kernel_args !0 {
				; PRELOAD-1-NEXT: call void @func(ptr [[TMP0]])
				; PRELOAD-1-NEXT: ret void
				;
				; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func
				; PRELOAD-3-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !preload_kernel_args !0 {
				; PRELOAD-3-NEXT: call void @func(ptr [[TMP0]])
				; PRELOAD-3-NEXT: ret void
				;
				; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func
				; PRELOAD-16-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !preload_kernel_args !0 {
				; PRELOAD-16-NEXT: call void @func(ptr [[TMP0]])
				; PRELOAD-16-NEXT: ret void
				;
				; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func
				; PRELOAD-20-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !preload_kernel_args !0 {
				; PRELOAD-20-NEXT: call void @func(ptr [[TMP0]])
				; PRELOAD-20-NEXT: ret void
				;
				call void @func(ptr %0)
				ret void
				}

				define amdgpu_kernel void @test_preload_hint_kernel_1_call_intrinsic(i16 %0) #0 {
				; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic
				; NO-PRELOAD-SAME: (i16 [[TMP0:%.*]]) #[[ATTR2]] {
				; NO-PRELOAD-NEXT: call void @llvm.amdgcn.set.prio(i16 [[TMP0]])
				; NO-PRELOAD-NEXT: ret void
				;
				; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic
				; PRELOAD-1-SAME: (i16 inreg [[TMP0:%.*]]) #[[ATTR2]] !preload_kernel_args !0 {
				; PRELOAD-1-NEXT: call void @llvm.amdgcn.set.prio(i16 [[TMP0]])
				; PRELOAD-1-NEXT: ret void
				;
				; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic
				; PRELOAD-3-SAME: (i16 inreg [[TMP0:%.*]]) #[[ATTR2]] !preload_kernel_args !0 {
				; PRELOAD-3-NEXT: call void @llvm.amdgcn.set.prio(i16 [[TMP0]])
				; PRELOAD-3-NEXT: ret void
				;
				; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic
				; PRELOAD-16-SAME: (i16 inreg [[TMP0:%.*]]) #[[ATTR2]] !preload_kernel_args !0 {
				; PRELOAD-16-NEXT: call void @llvm.amdgcn.set.prio(i16 [[TMP0]])
				; PRELOAD-16-NEXT: ret void
				;
				; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic
				; PRELOAD-20-SAME: (i16 inreg [[TMP0:%.*]]) #[[ATTR2]] !preload_kernel_args !0 {
				; PRELOAD-20-NEXT: call void @llvm.amdgcn.set.prio(i16 [[TMP0]])
				; PRELOAD-20-NEXT: ret void
				;
				call void @llvm.amdgcn.set.prio(i16 %0)
				ret void
				}

				define spir_kernel void @test_preload_hint_kernel_1_spir_cc(ptr %0) #0 {
				; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc
				; NO-PRELOAD-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
				; NO-PRELOAD-NEXT: ret void
				;
				; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc
				; PRELOAD-1-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
				; PRELOAD-1-NEXT: ret void
				;
				; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc
				; PRELOAD-3-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
				; PRELOAD-3-NEXT: ret void
				;
				; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc
				; PRELOAD-16-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
				; PRELOAD-16-NEXT: ret void
				;
				; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc
				; PRELOAD-20-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
				; PRELOAD-20-NEXT: ret void
				;
				ret void
				}

				define amdgpu_kernel void @test_preload_hint_kernel_2_preexisting(i32 inreg %0, i64 %1) #0 {
				; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting
				; NO-PRELOAD-SAME: (i32 inreg [[TMP0:%.]], i64 [[TMP1:%.]]) #[[ATTR0]] {
				; NO-PRELOAD-NEXT: ret void
				;
				; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting
				; PRELOAD-1-SAME: (i32 inreg [[TMP0:%.]], i64 [[TMP1:%.]]) #[[ATTR0]] !preload_kernel_args !0 {
				; PRELOAD-1-NEXT: ret void
				;
				; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting
				; PRELOAD-3-SAME: (i32 inreg [[TMP0:%.]], i64 inreg [[TMP1:%.]]) #[[ATTR0]] !preload_kernel_args !0 {
				; PRELOAD-3-NEXT: ret void
				;
				; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting
				; PRELOAD-16-SAME: (i32 inreg [[TMP0:%.]], i64 inreg [[TMP1:%.]]) #[[ATTR0]] !preload_kernel_args !0 {
				; PRELOAD-16-NEXT: ret void
				;
				; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting
				; PRELOAD-20-SAME: (i32 inreg [[TMP0:%.]], i64 inreg [[TMP1:%.]]) #[[ATTR0]] !preload_kernel_args !0 {
				; PRELOAD-20-NEXT: ret void
				;
				ret void
				}

				define amdgpu_kernel void @test_preload_hint_kernel_incompatible_attributes(ptr addrspace(4) byref(i32) %0, ptr nest %1) {
				arsenmUnsubmitted Not Done Reply Inline Actions Needs some exotic types. i8, <2 x half>, <3 x half> odd bit sized scalars, odd bit sized element vectors, odd vectors, structs, arrays and nested structs arsenm: Needs some exotic types. i8, <2 x half>, <3 x half> odd bit sized scalars, odd bit sized…
				arsenmUnsubmitted Not Done Reply Inline Actions We could probably get away with making the verifier reject aggregate arguments for amdgpu_kernel arsenm: We could probably get away with making the verifier reject aggregate arguments for amdgpu_kernel
				kerbowaAuthorUnsubmitted Not Done Reply Inline Actions Kind of unnecessary here since none of that impacts the addition of inreg at this point, it will just reject preloading the argument later. kerbowa: Kind of unnecessary here since none of that impacts the addition of inreg at this point, it…
				; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes
				; NO-PRELOAD-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.]], ptr nest [[TMP1:%.]]) #[[ATTR3:[0-9]+]] {
				; NO-PRELOAD-NEXT: ret void
				;
				; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes
				; PRELOAD-1-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.]], ptr nest [[TMP1:%.]]) #[[ATTR3:[0-9]+]] !preload_kernel_args !0 {
				; PRELOAD-1-NEXT: ret void
				;
				; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes
				; PRELOAD-3-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.]], ptr nest [[TMP1:%.]]) #[[ATTR3:[0-9]+]] !preload_kernel_args !0 {
				; PRELOAD-3-NEXT: ret void
				;
				; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes
				; PRELOAD-16-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.]], ptr nest [[TMP1:%.]]) #[[ATTR3:[0-9]+]] !preload_kernel_args !0 {
				; PRELOAD-16-NEXT: ret void
				;
				; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes
				; PRELOAD-20-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.]], ptr nest [[TMP1:%.]]) #[[ATTR3:[0-9]+]] !preload_kernel_args !0 {
				; PRELOAD-20-NEXT: ret void
				;
				ret void
				}

				declare void @func(ptr) #0
				declare void @llvm.amdgcn.set.prio(i16)

				attributes #0 = { nounwind }
				;.
				; NO-PRELOAD: attributes #[[ATTR0]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; NO-PRELOAD: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; NO-PRELOAD: attributes #[[ATTR2]] = { nounwind "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; NO-PRELOAD: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; NO-PRELOAD: attributes #[[ATTR4:[0-9]+]] = { nounwind "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; NO-PRELOAD: attributes #[[ATTR5:[0-9]+]] = { "target-cpu"="gfx90a" }
				;.
				; PRELOAD-1: attributes #[[ATTR0]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-1: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-1: attributes #[[ATTR2]] = { nounwind "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-1: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-1: attributes #[[ATTR4:[0-9]+]] = { nounwind "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-1: attributes #[[ATTR5:[0-9]+]] = { "target-cpu"="gfx90a" }
				;.
				; PRELOAD-3: attributes #[[ATTR0]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-3: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-3: attributes #[[ATTR2]] = { nounwind "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-3: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-3: attributes #[[ATTR4:[0-9]+]] = { nounwind "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-3: attributes #[[ATTR5:[0-9]+]] = { "target-cpu"="gfx90a" }
				;.
				; PRELOAD-16: attributes #[[ATTR0]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-16: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-16: attributes #[[ATTR2]] = { nounwind "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-16: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-16: attributes #[[ATTR4:[0-9]+]] = { nounwind "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-16: attributes #[[ATTR5:[0-9]+]] = { "target-cpu"="gfx90a" }
				;.
				; PRELOAD-20: attributes #[[ATTR0]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-20: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-20: attributes #[[ATTR2]] = { nounwind "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-20: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-20: attributes #[[ATTR4:[0-9]+]] = { nounwind "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
				; PRELOAD-20: attributes #[[ATTR5:[0-9]+]] = { "target-cpu"="gfx90a" }
				;.
				; PRELOAD-1: [[META0:![0-9]+]] = !{i32 1}
				;.
				; PRELOAD-3: [[META0:![0-9]+]] = !{i32 3}
				;.
				; PRELOAD-16: [[META0:![0-9]+]] = !{i32 16}
				;.
				; PRELOAD-20: [[META0:![0-9]+]] = !{i32 20}
				;.

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Use inreg for hint to preload kernel arguments
ClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 552570

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

llvm/test/CodeGen/AMDGPU/preload-kernal-args-inreg-hints.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Use inreg for hint to preload kernel argumentsClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 552570

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

llvm/test/CodeGen/AMDGPU/preload-kernal-args-inreg-hints.ll

[AMDGPU] Use inreg for hint to preload kernel arguments
ClosedPublic