diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -17,6 +17,10 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO/Attributor.h" @@ -28,6 +32,10 @@ using namespace llvm; +static cl::opt KernargPreloadCount( + "amdgpu-kernarg-preload-count", + cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0)); + #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS, enum ImplicitArgumentPositions { @@ -914,6 +922,33 @@ llvm_unreachable("AAAMDWavesPerEU is only valid for function position"); } +static void addPreloadKernArgHint(Function &F, TargetMachine &TM) { + const GCNSubtarget &ST = TM.getSubtarget(F); + for (unsigned I = 0; + I < F.arg_size() && + I < std::min(KernargPreloadCount.getValue(), ST.getMaxNumUserSGPRs()); + ++I) { + Argument &Arg = *F.getArg(I); + // Check for incompatible attributes. + if (Arg.hasByRefAttr() || Arg.hasNestAttr()) + break; + + Arg.addAttr(Attribute::InReg); + } + + if (KernargPreloadCount > 0) { + LLVM_DEBUG(dbgs() << "Kernel \"" << F.getName() + << "\" will attempt to preload " << KernargPreloadCount + << " kernel arguemnts.\n"); + LLVMContext &Ctx = F.getParent()->getContext(); + MDBuilder MDB(Ctx); + F.setMetadata("preload_kernel_args", + MDNode::get(Ctx, MDB.createConstant(llvm::ConstantInt::get( + IntegerType::getInt32Ty(Ctx), + KernargPreloadCount)))); + } +} + class AMDGPUAttributor : public ModulePass { public: AMDGPUAttributor() : ModulePass(ID) {} @@ -960,9 +995,12 @@ if (!F.isIntrinsic()) { A.getOrCreateAAFor(IRPosition::function(F)); A.getOrCreateAAFor(IRPosition::function(F)); - if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) { + CallingConv::ID CC = F.getCallingConv(); + if (!AMDGPU::isEntryFunctionCC(CC)) { A.getOrCreateAAFor(IRPosition::function(F)); A.getOrCreateAAFor(IRPosition::function(F)); + } else if (CC == CallingConv::AMDGPU_KERNEL) { + addPreloadKernArgHint(F, *TM); } } } diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernal-args-inreg-hints.ll b/llvm/test/CodeGen/AMDGPU/preload-kernal-args-inreg-hints.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/preload-kernal-args-inreg-hints.ll @@ -0,0 +1,307 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-attributor -S < %s | FileCheck -check-prefix=NO-PRELOAD %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=1 -amdgpu-attributor -S < %s | FileCheck -check-prefix=PRELOAD-1 %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=3 -amdgpu-attributor -S < %s | FileCheck -check-prefix=PRELOAD-3 %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=16 -amdgpu-attributor -S < %s | FileCheck -check-prefix=PRELOAD-16 %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=20 -amdgpu-attributor -S < %s | FileCheck -check-prefix=PRELOAD-20 %s + +define amdgpu_kernel void @test_preload_hint_kernel_1(ptr %0) #0 { +; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1 +; NO-PRELOAD-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { +; NO-PRELOAD-NEXT: ret void +; +; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1 +; PRELOAD-1-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !preload_kernel_args !0 { +; PRELOAD-1-NEXT: ret void +; +; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1 +; PRELOAD-3-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !preload_kernel_args !0 { +; PRELOAD-3-NEXT: ret void +; +; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1 +; PRELOAD-16-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !preload_kernel_args !0 { +; PRELOAD-16-NEXT: ret void +; +; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1 +; PRELOAD-20-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !preload_kernel_args !0 { +; PRELOAD-20-NEXT: ret void +; + ret void +} + +define amdgpu_kernel void @test_preload_hint_kernel_2(i32 %0, i64 %1) #0 { +; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2 +; NO-PRELOAD-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0]] { +; NO-PRELOAD-NEXT: ret void +; +; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2 +; PRELOAD-1-SAME: (i32 inreg [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0]] !preload_kernel_args !0 { +; PRELOAD-1-NEXT: ret void +; +; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2 +; PRELOAD-3-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]]) #[[ATTR0]] !preload_kernel_args !0 { +; PRELOAD-3-NEXT: ret void +; +; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2 +; PRELOAD-16-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]]) #[[ATTR0]] !preload_kernel_args !0 { +; PRELOAD-16-NEXT: ret void +; +; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2 +; PRELOAD-20-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]]) #[[ATTR0]] !preload_kernel_args !0 { +; PRELOAD-20-NEXT: ret void +; + ret void +} + +define amdgpu_kernel void @test_preload_hint_kernel_4(i32 %0, i64 %1, <2 x float> %2, ptr %3) #0 { +; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4 +; NO-PRELOAD-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], <2 x float> [[TMP2:%.*]], ptr [[TMP3:%.*]]) #[[ATTR0]] { +; NO-PRELOAD-NEXT: ret void +; +; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4 +; PRELOAD-1-SAME: (i32 inreg [[TMP0:%.*]], i64 [[TMP1:%.*]], <2 x float> [[TMP2:%.*]], ptr [[TMP3:%.*]]) #[[ATTR0]] !preload_kernel_args !0 { +; PRELOAD-1-NEXT: ret void +; +; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4 +; PRELOAD-3-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]], <2 x float> inreg [[TMP2:%.*]], ptr [[TMP3:%.*]]) #[[ATTR0]] !preload_kernel_args !0 { +; PRELOAD-3-NEXT: ret void +; +; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4 +; PRELOAD-16-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]], <2 x float> inreg [[TMP2:%.*]], ptr inreg [[TMP3:%.*]]) #[[ATTR0]] !preload_kernel_args !0 { +; PRELOAD-16-NEXT: ret void +; +; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_4 +; PRELOAD-20-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]], <2 x float> inreg [[TMP2:%.*]], ptr inreg [[TMP3:%.*]]) #[[ATTR0]] !preload_kernel_args !0 { +; PRELOAD-20-NEXT: ret void +; + ret void +} + +define amdgpu_kernel void @test_preload_hint_kernel_18(i32 %0, i64 %1, <2 x float> %2, ptr %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32 %14, i32 %15, i32 %16, i32 %17) #0 { +; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18 +; NO-PRELOAD-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], <2 x float> [[TMP2:%.*]], ptr [[TMP3:%.*]], i32 [[TMP4:%.*]], i32 [[TMP5:%.*]], i32 [[TMP6:%.*]], i32 [[TMP7:%.*]], i32 [[TMP8:%.*]], i32 [[TMP9:%.*]], i32 [[TMP10:%.*]], i32 [[TMP11:%.*]], i32 [[TMP12:%.*]], i32 [[TMP13:%.*]], i32 [[TMP14:%.*]], i32 [[TMP15:%.*]], i32 [[TMP16:%.*]], i32 [[TMP17:%.*]]) #[[ATTR0]] { +; NO-PRELOAD-NEXT: ret void +; +; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18 +; PRELOAD-1-SAME: (i32 inreg [[TMP0:%.*]], i64 [[TMP1:%.*]], <2 x float> [[TMP2:%.*]], ptr [[TMP3:%.*]], i32 [[TMP4:%.*]], i32 [[TMP5:%.*]], i32 [[TMP6:%.*]], i32 [[TMP7:%.*]], i32 [[TMP8:%.*]], i32 [[TMP9:%.*]], i32 [[TMP10:%.*]], i32 [[TMP11:%.*]], i32 [[TMP12:%.*]], i32 [[TMP13:%.*]], i32 [[TMP14:%.*]], i32 [[TMP15:%.*]], i32 [[TMP16:%.*]], i32 [[TMP17:%.*]]) #[[ATTR0]] !preload_kernel_args !0 { +; PRELOAD-1-NEXT: ret void +; +; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18 +; PRELOAD-3-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]], <2 x float> inreg [[TMP2:%.*]], ptr [[TMP3:%.*]], i32 [[TMP4:%.*]], i32 [[TMP5:%.*]], i32 [[TMP6:%.*]], i32 [[TMP7:%.*]], i32 [[TMP8:%.*]], i32 [[TMP9:%.*]], i32 [[TMP10:%.*]], i32 [[TMP11:%.*]], i32 [[TMP12:%.*]], i32 [[TMP13:%.*]], i32 [[TMP14:%.*]], i32 [[TMP15:%.*]], i32 [[TMP16:%.*]], i32 [[TMP17:%.*]]) #[[ATTR0]] !preload_kernel_args !0 { +; PRELOAD-3-NEXT: ret void +; +; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18 +; PRELOAD-16-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]], <2 x float> inreg [[TMP2:%.*]], ptr inreg [[TMP3:%.*]], i32 inreg [[TMP4:%.*]], i32 inreg [[TMP5:%.*]], i32 inreg [[TMP6:%.*]], i32 inreg [[TMP7:%.*]], i32 inreg [[TMP8:%.*]], i32 inreg [[TMP9:%.*]], i32 inreg [[TMP10:%.*]], i32 inreg [[TMP11:%.*]], i32 inreg [[TMP12:%.*]], i32 inreg [[TMP13:%.*]], i32 inreg [[TMP14:%.*]], i32 inreg [[TMP15:%.*]], i32 [[TMP16:%.*]], i32 [[TMP17:%.*]]) #[[ATTR0]] !preload_kernel_args !0 { +; PRELOAD-16-NEXT: ret void +; +; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_18 +; PRELOAD-20-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]], <2 x float> inreg [[TMP2:%.*]], ptr inreg [[TMP3:%.*]], i32 inreg [[TMP4:%.*]], i32 inreg [[TMP5:%.*]], i32 inreg [[TMP6:%.*]], i32 inreg [[TMP7:%.*]], i32 inreg [[TMP8:%.*]], i32 inreg [[TMP9:%.*]], i32 inreg [[TMP10:%.*]], i32 inreg [[TMP11:%.*]], i32 inreg [[TMP12:%.*]], i32 inreg [[TMP13:%.*]], i32 inreg [[TMP14:%.*]], i32 inreg [[TMP15:%.*]], i32 [[TMP16:%.*]], i32 [[TMP17:%.*]]) #[[ATTR0]] !preload_kernel_args !0 { +; PRELOAD-20-NEXT: ret void +; + ret void +} + +define void @test_preload_hint_non_kernel_2(i32 %0, i64 %1) #0 { +; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2 +; NO-PRELOAD-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { +; NO-PRELOAD-NEXT: ret void +; +; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2 +; PRELOAD-1-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { +; PRELOAD-1-NEXT: ret void +; +; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2 +; PRELOAD-3-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { +; PRELOAD-3-NEXT: ret void +; +; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2 +; PRELOAD-16-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { +; PRELOAD-16-NEXT: ret void +; +; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_non_kernel_2 +; PRELOAD-20-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { +; PRELOAD-20-NEXT: ret void +; + ret void +} + +define amdgpu_kernel void @test_preload_hint_kernel_1_call_func(ptr %0) #0 { +; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func +; NO-PRELOAD-SAME: (ptr [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] { +; NO-PRELOAD-NEXT: call void @func(ptr [[TMP0]]) +; NO-PRELOAD-NEXT: ret void +; +; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func +; PRELOAD-1-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !preload_kernel_args !0 { +; PRELOAD-1-NEXT: call void @func(ptr [[TMP0]]) +; PRELOAD-1-NEXT: ret void +; +; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func +; PRELOAD-3-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !preload_kernel_args !0 { +; PRELOAD-3-NEXT: call void @func(ptr [[TMP0]]) +; PRELOAD-3-NEXT: ret void +; +; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func +; PRELOAD-16-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !preload_kernel_args !0 { +; PRELOAD-16-NEXT: call void @func(ptr [[TMP0]]) +; PRELOAD-16-NEXT: ret void +; +; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_func +; PRELOAD-20-SAME: (ptr inreg [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !preload_kernel_args !0 { +; PRELOAD-20-NEXT: call void @func(ptr [[TMP0]]) +; PRELOAD-20-NEXT: ret void +; + call void @func(ptr %0) + ret void +} + +define amdgpu_kernel void @test_preload_hint_kernel_1_call_intrinsic(i16 %0) #0 { +; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic +; NO-PRELOAD-SAME: (i16 [[TMP0:%.*]]) #[[ATTR2]] { +; NO-PRELOAD-NEXT: call void @llvm.amdgcn.set.prio(i16 [[TMP0]]) +; NO-PRELOAD-NEXT: ret void +; +; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic +; PRELOAD-1-SAME: (i16 inreg [[TMP0:%.*]]) #[[ATTR2]] !preload_kernel_args !0 { +; PRELOAD-1-NEXT: call void @llvm.amdgcn.set.prio(i16 [[TMP0]]) +; PRELOAD-1-NEXT: ret void +; +; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic +; PRELOAD-3-SAME: (i16 inreg [[TMP0:%.*]]) #[[ATTR2]] !preload_kernel_args !0 { +; PRELOAD-3-NEXT: call void @llvm.amdgcn.set.prio(i16 [[TMP0]]) +; PRELOAD-3-NEXT: ret void +; +; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic +; PRELOAD-16-SAME: (i16 inreg [[TMP0:%.*]]) #[[ATTR2]] !preload_kernel_args !0 { +; PRELOAD-16-NEXT: call void @llvm.amdgcn.set.prio(i16 [[TMP0]]) +; PRELOAD-16-NEXT: ret void +; +; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_call_intrinsic +; PRELOAD-20-SAME: (i16 inreg [[TMP0:%.*]]) #[[ATTR2]] !preload_kernel_args !0 { +; PRELOAD-20-NEXT: call void @llvm.amdgcn.set.prio(i16 [[TMP0]]) +; PRELOAD-20-NEXT: ret void +; + call void @llvm.amdgcn.set.prio(i16 %0) + ret void +} + +define spir_kernel void @test_preload_hint_kernel_1_spir_cc(ptr %0) #0 { +; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc +; NO-PRELOAD-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] { +; NO-PRELOAD-NEXT: ret void +; +; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc +; PRELOAD-1-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] { +; PRELOAD-1-NEXT: ret void +; +; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc +; PRELOAD-3-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] { +; PRELOAD-3-NEXT: ret void +; +; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc +; PRELOAD-16-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] { +; PRELOAD-16-NEXT: ret void +; +; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_1_spir_cc +; PRELOAD-20-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] { +; PRELOAD-20-NEXT: ret void +; + ret void +} + +define amdgpu_kernel void @test_preload_hint_kernel_2_preexisting(i32 inreg %0, i64 %1) #0 { +; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting +; NO-PRELOAD-SAME: (i32 inreg [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0]] { +; NO-PRELOAD-NEXT: ret void +; +; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting +; PRELOAD-1-SAME: (i32 inreg [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0]] !preload_kernel_args !0 { +; PRELOAD-1-NEXT: ret void +; +; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting +; PRELOAD-3-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]]) #[[ATTR0]] !preload_kernel_args !0 { +; PRELOAD-3-NEXT: ret void +; +; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting +; PRELOAD-16-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]]) #[[ATTR0]] !preload_kernel_args !0 { +; PRELOAD-16-NEXT: ret void +; +; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_2_preexisting +; PRELOAD-20-SAME: (i32 inreg [[TMP0:%.*]], i64 inreg [[TMP1:%.*]]) #[[ATTR0]] !preload_kernel_args !0 { +; PRELOAD-20-NEXT: ret void +; + ret void +} + +define amdgpu_kernel void @test_preload_hint_kernel_incompatible_attributes(ptr addrspace(4) byref(i32) %0, ptr nest %1) { +; NO-PRELOAD-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes +; NO-PRELOAD-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.*]], ptr nest [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { +; NO-PRELOAD-NEXT: ret void +; +; PRELOAD-1-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes +; PRELOAD-1-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.*]], ptr nest [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] !preload_kernel_args !0 { +; PRELOAD-1-NEXT: ret void +; +; PRELOAD-3-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes +; PRELOAD-3-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.*]], ptr nest [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] !preload_kernel_args !0 { +; PRELOAD-3-NEXT: ret void +; +; PRELOAD-16-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes +; PRELOAD-16-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.*]], ptr nest [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] !preload_kernel_args !0 { +; PRELOAD-16-NEXT: ret void +; +; PRELOAD-20-LABEL: define {{[^@]+}}@test_preload_hint_kernel_incompatible_attributes +; PRELOAD-20-SAME: (ptr addrspace(4) byref(i32) [[TMP0:%.*]], ptr nest [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] !preload_kernel_args !0 { +; PRELOAD-20-NEXT: ret void +; + ret void +} + +declare void @func(ptr) #0 +declare void @llvm.amdgcn.set.prio(i16) + +attributes #0 = { nounwind } +;. +; NO-PRELOAD: attributes #[[ATTR0]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; NO-PRELOAD: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; NO-PRELOAD: attributes #[[ATTR2]] = { nounwind "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; NO-PRELOAD: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; NO-PRELOAD: attributes #[[ATTR4:[0-9]+]] = { nounwind "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; NO-PRELOAD: attributes #[[ATTR5:[0-9]+]] = { "target-cpu"="gfx90a" } +;. +; PRELOAD-1: attributes #[[ATTR0]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-1: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-1: attributes #[[ATTR2]] = { nounwind "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-1: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-1: attributes #[[ATTR4:[0-9]+]] = { nounwind "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-1: attributes #[[ATTR5:[0-9]+]] = { "target-cpu"="gfx90a" } +;. +; PRELOAD-3: attributes #[[ATTR0]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-3: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-3: attributes #[[ATTR2]] = { nounwind "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-3: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-3: attributes #[[ATTR4:[0-9]+]] = { nounwind "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-3: attributes #[[ATTR5:[0-9]+]] = { "target-cpu"="gfx90a" } +;. +; PRELOAD-16: attributes #[[ATTR0]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-16: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-16: attributes #[[ATTR2]] = { nounwind "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-16: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-16: attributes #[[ATTR4:[0-9]+]] = { nounwind "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-16: attributes #[[ATTR5:[0-9]+]] = { "target-cpu"="gfx90a" } +;. +; PRELOAD-20: attributes #[[ATTR0]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-20: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-20: attributes #[[ATTR2]] = { nounwind "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-20: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-20: attributes #[[ATTR4:[0-9]+]] = { nounwind "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; PRELOAD-20: attributes #[[ATTR5:[0-9]+]] = { "target-cpu"="gfx90a" } +;. +; PRELOAD-1: [[META0:![0-9]+]] = !{i32 1} +;. +; PRELOAD-3: [[META0:![0-9]+]] = !{i32 3} +;. +; PRELOAD-16: [[META0:![0-9]+]] = !{i32 16} +;. +; PRELOAD-20: [[META0:![0-9]+]] = !{i32 20} +;.