diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -566,9 +566,6 @@ "__amdgpu_device_library_preserve_asan_functions_ptr", nullptr, llvm::GlobalVariable::NotThreadLocal); addCompilerUsedGlobal(Var); - if (!getModule().getModuleFlag("amdgpu_hostcall")) { - getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1); - } } // Emit amdgpu_code_object_version module flag, which is code object version // times 100. diff --git a/clang/test/CodeGenCUDA/amdgpu-asan-printf.cu b/clang/test/CodeGenCUDA/amdgpu-asan-printf.cu deleted file mode 100644 --- a/clang/test/CodeGenCUDA/amdgpu-asan-printf.cu +++ /dev/null @@ -1,17 +0,0 @@ -// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \ -// RUN: -fcuda-is-device -target-cpu gfx906 -fsanitize=address \ -// RUN: -O3 -x hip | FileCheck -check-prefixes=MFCHECK %s - -// MFCHECK: !{{.*}} = !{i32 4, !"amdgpu_hostcall", i32 1} - -// Test to check hostcall module flag metadata is generated correctly -// when a program has printf call and compiled with -fsanitize=address. -#include "Inputs/cuda.h" -__device__ void non_kernel() { - printf("sanitized device function"); -} - -__global__ void kernel() { - non_kernel(); -} - diff --git a/clang/test/CodeGenCUDA/amdgpu-asan.cu b/clang/test/CodeGenCUDA/amdgpu-asan.cu --- a/clang/test/CodeGenCUDA/amdgpu-asan.cu +++ b/clang/test/CodeGenCUDA/amdgpu-asan.cu @@ -9,12 +9,12 @@ // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -target-cpu gfx906 -fsanitize=address \ // RUN: -mlink-bitcode-file %t.asanrtl.bc -x hip \ -// RUN: | FileCheck -check-prefixes=ASAN,MFCHECK %s +// RUN: | FileCheck -check-prefixes=ASAN %s // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -target-cpu gfx906 -fsanitize=address \ // RUN: -O3 -mlink-bitcode-file %t.asanrtl.bc -x hip \ -// RUN: | FileCheck -check-prefixes=ASAN,MFCHECK %s +// RUN: | FileCheck -check-prefixes=ASAN %s // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \ // RUN: -fcuda-is-device -target-cpu gfx906 -x hip \ @@ -27,7 +27,5 @@ // ASAN-DAG: @llvm.compiler.used = {{.*}}@__amdgpu_device_library_preserve_asan_functions_ptr // ASAN-DAG: define weak void @__asan_report_load1(i64 %{{.*}}) -// MFCHECK: !{{.*}} = !{i32 4, !"amdgpu_hostcall", i32 1} - // CHECK-NOT: @__amdgpu_device_library_preserve_asan_functions // CHECK-NOT: @__asan_report_load1 diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def @@ -18,6 +18,7 @@ AMDGPU_ATTRIBUTE(QUEUE_PTR, "amdgpu-no-queue-ptr") AMDGPU_ATTRIBUTE(DISPATCH_ID, "amdgpu-no-dispatch-id") AMDGPU_ATTRIBUTE(IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr") +AMDGPU_ATTRIBUTE(HOSTCALL_PTR, "amdgpu-no-hostcall-ptr") AMDGPU_ATTRIBUTE(WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x") AMDGPU_ATTRIBUTE(WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y") AMDGPU_ATTRIBUTE(WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z") diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -12,6 +12,7 @@ #include "AMDGPU.h" #include "GCNSubtarget.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" @@ -102,7 +103,7 @@ /// Returns true if the function requires the implicit argument be passed /// regardless of the function contents. -static bool funcRequiresImplicitArgPtr(const Function &F) { +static bool funcRequiresHostcallPtr(const Function &F) { // Sanitizers require the hostcall buffer passed in the implicit arguments. return F.hasFnAttribute(Attribute::SanitizeAddress) || F.hasFnAttribute(Attribute::SanitizeThread) || @@ -341,12 +342,15 @@ // If the function requires the implicit arg pointer due to sanitizers, // assume it's needed even if explicitly marked as not requiring it. - const bool NeedsImplicit = funcRequiresImplicitArgPtr(*F); - if (NeedsImplicit) + const bool NeedsHostcall = funcRequiresHostcallPtr(*F); + if (NeedsHostcall) { removeAssumedBits(IMPLICIT_ARG_PTR); + removeAssumedBits(HOSTCALL_PTR); + } for (auto Attr : ImplicitAttrs) { - if (NeedsImplicit && Attr.first == IMPLICIT_ARG_PTR) + if (NeedsHostcall && + (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR)) continue; if (F->hasFnAttribute(Attr.second)) @@ -405,6 +409,11 @@ removeAssumedBits(QUEUE_PTR); } + if (funcRetrievesHostcallPtr(A)) { + removeAssumedBits(IMPLICIT_ARG_PTR); + removeAssumedBits(HOSTCALL_PTR); + } + return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; } @@ -486,6 +495,35 @@ return false; } + + bool funcRetrievesHostcallPtr(Attributor &A) { + auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(); + + // Check if this is a call to the implicitarg_ptr builtin and it + // is used to retrieve the hostcall pointer. The implicit arg for + // hostcall is not used only if every use of the implicitarg_ptr + // is a load that clearly does not retrieve any byte of the + // hostcall pointer. We check this by tracing all the uses of the + // initial call to the implicitarg_ptr intrinsic. + auto DoesNotLeadToHostcallPtr = [&](Instruction &I) { + auto &Call = cast(I); + if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr) + return true; + + const auto &PointerInfoAA = A.getAAFor( + *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED); + + AAPointerInfo::OffsetAndSize OAS(Pos, 8); + return PointerInfoAA.forallInterferingAccesses( + OAS, [](const AAPointerInfo::Access &Acc, bool IsExact) { + return Acc.getRemoteInst()->isDroppable(); + }); + }; + + bool UsedAssumedInformation = false; + return !A.checkForAllCallLikeInstructions(DoesNotLeadToHostcallPtr, *this, + UsedAssumedInformation); + } }; AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, @@ -638,7 +676,7 @@ AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); DenseSet Allowed( {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, - &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID}); + &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID, &AAPointerInfo::ID}); Attributor A(Functions, InfoCache, CGUpdater, &Allowed); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -405,7 +405,7 @@ if (HiddenArgNumBytes >= 32) { if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer); - else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) { + else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) { // The printf runtime binding pass should have ensured that hostcall and // printf are not used in the same module. assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts")); @@ -794,6 +794,7 @@ msgpack::ArrayDocNode Args) { auto &Func = MF.getFunction(); const GCNSubtarget &ST = MF.getSubtarget(); + const SIMachineFunctionInfo &MFI = *MF.getInfo(); unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func); if (!HiddenArgNumBytes) @@ -822,7 +823,7 @@ if (M->getNamedMetadata("llvm.printf.fmts")) emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, Args); - else if (M->getModuleFlag("amdgpu_hostcall")) { + else if (MFI.hasHostcallPtr()) { // The printf runtime binding pass should have ensured that hostcall and // printf are not used in the same module. assert(!M->getNamedMetadata("llvm.printf.fmts")); @@ -973,6 +974,7 @@ const GCNSubtarget &ST = MF.getSubtarget(); const Module *M = Func.getParent(); auto &DL = M->getDataLayout(); + const SIMachineFunctionInfo &MFI = *MF.getInfo(); auto Int64Ty = Type::getInt64Ty(Func.getContext()); auto Int32Ty = Type::getInt32Ty(Func.getContext()); @@ -1011,7 +1013,7 @@ } else Offset += 8; // Skipped. - if (M->getModuleFlag("amdgpu_hostcall")) { + if (MFI.hasHostcallPtr()) { emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, Args); } else @@ -1041,7 +1043,6 @@ } else Offset += 8; // Skipped. - const SIMachineFunctionInfo &MFI = *MF.getInfo(); if (MFI.hasQueuePtr()) emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args); } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -421,6 +421,7 @@ // Pointer to where the ABI inserts special kernel arguments separate from the // user arguments. This is an offset from the KernargSegmentPtr. bool ImplicitArgPtr : 1; + bool HostcallPtr : 1; bool MayNeedAGPRs : 1; @@ -696,6 +697,10 @@ return ImplicitArgPtr; } + bool hasHostcallPtr() const { + return HostcallPtr; + } + bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -47,6 +47,7 @@ WorkItemIDZ(false), ImplicitBufferPtr(false), ImplicitArgPtr(false), + HostcallPtr(false), GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0), GDSSize(0) { @@ -141,6 +142,9 @@ if (!F.hasFnAttribute("amdgpu-no-dispatch-id")) DispatchID = true; + + if (!F.hasFnAttribute("amdgpu-no-hostcall-ptr")) + HostcallPtr = true; } // FIXME: This attribute is a hack, we just need an analysis on the function diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -54,6 +54,9 @@ /// false otherwise. bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI); +/// \returns The offset of the hostcall pointer argument from implicitarg_ptr +unsigned getHostcallImplicitArgPosition(); + struct GcnBufferFormatInfo { unsigned Format; unsigned BitsPerComp; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -136,6 +136,22 @@ isHsaAbiVersion5(STI); } +// FIXME: All such magic numbers about the ABI should be in a +// central TD file. +unsigned getHostcallImplicitArgPosition() { + switch (AmdhsaCodeObjectVersion) { + case 2: + case 3: + case 4: + return 24; + case 5: + return 80; + default: + llvm_unreachable("Unexpected code object version"); + return 0; + } +} + #define GET_MIMGBaseOpcodesTable_IMPL #define GET_MIMGDimInfoTable_IMPL #define GET_MIMGInfoTable_IMPL diff --git a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp --- a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp +++ b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp @@ -50,9 +50,6 @@ auto Int64Ty = Builder.getInt64Ty(); auto M = Builder.GetInsertBlock()->getModule(); auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty); - if (!M->getModuleFlag("amdgpu_hostcall")) { - M->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1); - } return Builder.CreateCall(Fn, Version); } diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll @@ -230,6 +230,6 @@ ; AKF_HSA: attributes #[[ATTR1]] = { nounwind } ;. ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn } -; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -837,7 +837,7 @@ ret float %fadd } -; Implicit arguments need to be enabled for sanitizers +; Hostcall needs to be enabled for sanitizers define amdgpu_kernel void @kern_sanitize_address() #4 { ; AKF_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address ; AKF_HSA-SAME: () #[[ATTR5:[0-9]+]] { @@ -853,7 +853,7 @@ ret void } -; Implicit arguments need to be enabled for sanitizers +; Hostcall needs to be enabled for sanitizers define void @func_sanitize_address() #4 { ; AKF_HSA-LABEL: define {{[^@]+}}@func_sanitize_address ; AKF_HSA-SAME: () #[[ATTR5]] { @@ -869,7 +869,7 @@ ret void } -; Implicit arguments need to be enabled for sanitizers +; Hostcall needs to be enabled for sanitizers define void @func_indirect_sanitize_address() #3 { ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address ; AKF_HSA-SAME: () #[[ATTR3]] { @@ -885,7 +885,7 @@ ret void } -; Implicit arguments need to be enabled for sanitizers +; Hostcall needs to be enabled for sanitizers define amdgpu_kernel void @kern_indirect_sanitize_address() #3 { ; AKF_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address ; AKF_HSA-SAME: () #[[ATTR4]] { @@ -937,22 +937,22 @@ ; AKF_HSA: attributes #[[ATTR6:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" } ;. ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } -; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -647,15 +647,15 @@ ; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-stack-objects" } ;. ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } -; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll @@ -418,13 +418,13 @@ ; AKF_CHECK: attributes #[[ATTR1]] = { nounwind } ;. ; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } -; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll @@ -35,6 +35,6 @@ ret void } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll --- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll +++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll @@ -42,6 +42,6 @@ ;. ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" } ;. -; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll @@ -50,7 +50,7 @@ ; CHECK-NEXT: - .address_space: global ; CHECK-NEXT: .offset: 32 ; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_kind: hidden_hostcall_buffer ; CHECK-NEXT: - .address_space: global ; CHECK-NEXT: .offset: 40 ; CHECK-NEXT: .size: 8 diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll @@ -26,6 +26,9 @@ ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenHostcallBuffer ; CHECK-NOT: ValueKind: HiddenDefaultQueue ; CHECK-NOT: ValueKind: HiddenCompletionAction define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) #0 @@ -56,7 +59,7 @@ ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueKind: HiddenHostcallBuffer ; CHECK-NEXT: AddrSpaceQual: Global ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll @@ -171,7 +171,7 @@ ; CHECK-NEXT: - .address_space: global ; CHECK-NEXT: .offset: 48 ; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_kind: hidden_hostcall_buffer ; CHECK: .name: test32 ; CHECK: .symbol: test32.kd define amdgpu_kernel void @test32( @@ -214,7 +214,7 @@ ; CHECK-NEXT: - .address_space: global ; CHECK-NEXT: .offset: 48 ; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_kind: hidden_hostcall_buffer ; CHECK-NEXT: - .address_space: global ; CHECK-NEXT: .offset: 56 ; CHECK-NEXT: .size: 8 @@ -265,7 +265,7 @@ ; CHECK-NEXT: - .address_space: global ; CHECK-NEXT: .offset: 48 ; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_kind: hidden_hostcall_buffer ; CHECK-NEXT: - .address_space: global ; CHECK-NEXT: .offset: 56 ; CHECK-NEXT: .size: 8 diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll @@ -112,10 +112,8 @@ ret void } -!llvm.module.flags = !{!0} !llvm.printf.fmts = !{!1, !2} -!0 = !{i32 1, !"amdgpu_hostcall", i32 1} !1 = !{!"1:1:4:%d\5Cn"} !2 = !{!"2:1:8:%g\5Cn"} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll @@ -177,7 +177,7 @@ ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueKind: HiddenHostcallBuffer ; CHECK-NEXT: AddrSpaceQual: Global ; CHECK-NEXT: CodeProps: define amdgpu_kernel void @test32( @@ -221,7 +221,7 @@ ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueKind: HiddenHostcallBuffer ; CHECK-NEXT: AddrSpaceQual: Global ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 @@ -273,7 +273,7 @@ ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueKind: HiddenHostcallBuffer ; CHECK-NEXT: AddrSpaceQual: Global ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll +++ /dev/null @@ -1,51 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s - -; CHECK: --- -; CHECK: amdhsa.kernels: -; CHECK: - .args: -; CHECK-NEXT: - .name: a -; CHECK-NEXT: .offset: 0 -; CHECK-NEXT: .size: 1 -; CHECK-NEXT: .type_name: char -; CHECK-NEXT: .value_kind: by_value -; CHECK-NEXT: - .offset: 8 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_global_offset_x -; CHECK-NEXT: - .offset: 16 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_global_offset_y -; CHECK-NEXT: - .offset: 24 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_global_offset_z - -; CHECK-NOT: .value_kind: hidden_hostcall_buffer - -; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: -; CHECK-NEXT: - 2 -; CHECK-NEXT: - 0 -; CHECK: .name: test_kernel -; CHECK: .symbol: test_kernel.kd - -define amdgpu_kernel void @test_kernel(i8 %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 - !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: amdhsa.version: -; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 - -attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } - -!1 = !{i32 0} -!2 = !{!"none"} -!3 = !{!"char"} -!4 = !{!""} - -!opencl.ocl.version = !{!90} -!90 = !{i32 2, i32 0} - -; PARSER: AMDGPU HSA Metadata Parser Test: PASS diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll +++ /dev/null @@ -1,48 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s - -; CHECK: --- -; CHECK: Version: [ 1, 0 ] -; CHECK: Kernels: - -; CHECK: - Name: test_kernel -; CHECK-NEXT: SymbolName: 'test_kernel@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: char -; CHECK-NEXT: Size: 1 -; CHECK-NEXT: Align: 1 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NOT: ValueKind: HiddenHostcallBuffer -; CHECK-NOT: ValueKind: HiddenDefaultQueue -; CHECK-NOT: ValueKind: HiddenCompletionAction - -define amdgpu_kernel void @test_kernel(i8 %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 - !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { - ret void -} - -attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } - -!1 = !{i32 0} -!2 = !{!"none"} -!3 = !{!"char"} -!4 = !{!""} - -!opencl.ocl.version = !{!90} -!90 = !{i32 2, i32 0} - -; PARSER: AMDGPU HSA Metadata Parser Test: PASS diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll @@ -48,7 +48,4 @@ !opencl.ocl.version = !{!90} !90 = !{i32 2, i32 0} -!llvm.module.flags = !{!0} -!0 = !{i32 4, !"amdgpu_hostcall", i32 1} - ; CHECK: AMDGPU HSA Metadata Parser Test: PASS diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll +++ /dev/null @@ -1,55 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s - -; CHECK: --- -; CHECK: amdhsa.kernels: -; CHECK: - .args: -; CHECK-NEXT: - .name: a -; CHECK-NEXT: .offset: 0 -; CHECK-NEXT: .size: 1 -; CHECK-NEXT: .type_name: char -; CHECK-NEXT: .value_kind: by_value -; CHECK-NEXT: - .offset: 8 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_global_offset_x -; CHECK-NEXT: - .offset: 16 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_global_offset_y -; CHECK-NEXT: - .offset: 24 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_global_offset_z -; CHECK-NEXT: - .address_space: global -; CHECK-NEXT: .offset: 32 -; CHECK-NEXT: .size: 8 -; CHECK-NEXT: .value_kind: hidden_hostcall_buffer -; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: -; CHECK-NEXT: - 2 -; CHECK-NEXT: - 0 -; CHECK: .name: test_kernel -; CHECK: .symbol: test_kernel.kd - -define amdgpu_kernel void @test_kernel(i8 %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 - !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { - ret void -} - -; CHECK: amdhsa.version: -; CHECK-NEXT: - 1 -; CHECK-NEXT: - 0 - -attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } - -!1 = !{i32 0} -!2 = !{!"none"} -!3 = !{!"char"} -!4 = !{!""} - -!opencl.ocl.version = !{!90} -!90 = !{i32 2, i32 0} - -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdgpu_hostcall", i32 1} - -; PARSER: AMDGPU HSA Metadata Parser Test: PASS diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll +++ /dev/null @@ -1,53 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s - -; CHECK: --- -; CHECK: Version: [ 1, 0 ] -; CHECK: Kernels: - -; CHECK: - Name: test_kernel -; CHECK-NEXT: SymbolName: 'test_kernel@kd' -; CHECK-NEXT: Language: OpenCL C -; CHECK-NEXT: LanguageVersion: [ 2, 0 ] -; CHECK-NEXT: Args: -; CHECK-NEXT: - Name: a -; CHECK-NEXT: TypeName: char -; CHECK-NEXT: Size: 1 -; CHECK-NEXT: Align: 1 -; CHECK-NEXT: ValueKind: ByValue -; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ -; CHECK-NEXT: - Size: 8 -; CHECK-NEXT: Align: 8 -; CHECK-NEXT: ValueKind: HiddenHostcallBuffer -; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NOT: ValueKind: HiddenDefaultQueue -; CHECK-NOT: ValueKind: HiddenCompletionAction - -declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) - -define amdgpu_kernel void @test_kernel(i8 %a) #0 - !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 - !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { - ret void -} - -attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } - -!1 = !{i32 0} -!2 = !{!"none"} -!3 = !{!"char"} -!4 = !{!""} - -!opencl.ocl.version = !{!90} -!90 = !{i32 2, i32 0} - -; PARSER: AMDGPU HSA Metadata Parser Test: PASS diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll @@ -0,0 +1,303 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=CHECK %s + +declare void @function1() + +declare void @function2() #0 + +; Function Attrs: noinline +define void @function3(i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink) #4 { + store i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink, align 8 + ret void +} + +; Function Attrs: noinline +define void @function4(i64 %arg, i64* %a) #4 { + store i64 %arg, i64* %a + ret void +} + +; Function Attrs: noinline +define void @function5(i8 addrspace(4)* %ptr, i64* %sink) #4 { + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 8 + %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)* + %x = load i64, i64 addrspace(4)* %cast + store i64 %x, i64* %sink + ret void +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1 + +; CHECK: amdhsa.kernels: +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel10 +define amdgpu_kernel void @test_kernel10(i8* %a) #2 { + store i8 3, i8* %a, align 1 + ret void +} + +; Call to an extern function + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel20 +define amdgpu_kernel void @test_kernel20(i8* %a) #2 { + call void @function1() + store i8 3, i8* %a, align 1 + ret void +} + +; Explicit attribute on kernel + +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel21 +define amdgpu_kernel void @test_kernel21(i8* %a) #3 { + call void @function1() + store i8 3, i8* %a, align 1 + ret void +} + +; Explicit attribute on extern callee + +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel22 +define amdgpu_kernel void @test_kernel22(i8* %a) #2 { + call void @function2() + store i8 3, i8* %a, align 1 + ret void +} + +; Access more bytes than the pointer size + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel30 +define amdgpu_kernel void @test_kernel30(i128* %a) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16 + %cast = bitcast i8 addrspace(4)* %gep to i128 addrspace(4)* + %x = load i128, i128 addrspace(4)* %cast + store i128 %x, i128* %a + ret void +} + +; Typical load of hostcall buffer pointer + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel40 +define amdgpu_kernel void @test_kernel40(i64* %a) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 24 + %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)* + %x = load i64, i64 addrspace(4)* %cast + store i64 %x, i64* %a + ret void +} + +; Typical usage, overriden by explicit attribute on kernel + +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel41 +define amdgpu_kernel void @test_kernel41(i64* %a) #3 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 24 + %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)* + %x = load i64, i64 addrspace(4)* %cast + store i64 %x, i64* %a + ret void +} + +; Access to implicit arg before the hostcall pointer + +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel42 +define amdgpu_kernel void @test_kernel42(i64* %a) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16 + %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)* + %x = load i64, i64 addrspace(4)* %cast + store i64 %x, i64* %a + ret void +} + +; Access to implicit arg after the hostcall pointer + +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel43 +define amdgpu_kernel void @test_kernel43(i64* %a) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 32 + %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)* + %x = load i64, i64 addrspace(4)* %cast + store i64 %x, i64* %a + ret void +} + +; Accessing a byte just before the hostcall pointer + +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel44 +define amdgpu_kernel void @test_kernel44(i8* %a) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 23 + %x = load i8, i8 addrspace(4)* %gep, align 1 + store i8 %x, i8* %a, align 1 + ret void +} + +; Accessing a byte inside the hostcall pointer + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel45 +define amdgpu_kernel void @test_kernel45(i8* %a) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 24 + %x = load i8, i8 addrspace(4)* %gep, align 1 + store i8 %x, i8* %a, align 1 + ret void +} + +; Accessing a byte inside the hostcall pointer + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel46 +define amdgpu_kernel void @test_kernel46(i8* %a) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 31 + %x = load i8, i8 addrspace(4)* %gep, align 1 + store i8 %x, i8* %a, align 1 + ret void +} + +; Accessing a byte just after the hostcall pointer + +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel47 +define amdgpu_kernel void @test_kernel47(i8* %a) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 32 + %x = load i8, i8 addrspace(4)* %gep, align 1 + store i8 %x, i8* %a, align 1 + ret void +} + +; Access with an unknown offset + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel50 +define amdgpu_kernel void @test_kernel50(i8* %a, i32 %b) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 %b + %x = load i8, i8 addrspace(4)* %gep, align 1 + store i8 %x, i8* %a, align 1 + ret void +} + +; Multiple geps reaching the hostcall pointer argument. + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel51 +define amdgpu_kernel void @test_kernel51(i8* %a) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16 + %gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 8 + %x = load i8, i8 addrspace(4)* %gep2, align 1 + store i8 %x, i8* %a, align 1 + ret void +} + +; Multiple geps not reaching the hostcall pointer argument. + +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel52 +define amdgpu_kernel void @test_kernel52(i8* %a) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16 + %gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 16 + %x = load i8, i8 addrspace(4)* %gep2, align 1 + store i8 %x, i8* %a, align 1 + ret void +} + +; Hostcall pointer used inside a function call + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel60 +define amdgpu_kernel void @test_kernel60(i64* %a) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 24 + %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)* + %x = load i64, i64 addrspace(4)* %cast + call void @function4(i64 %x, i64* %a) + ret void +} + +; Hostcall pointer retrieved inside a function call; chain of geps + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel61 +define amdgpu_kernel void @test_kernel61(i64* %a) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16 + call void @function5(i8 addrspace(4)* %gep, i64* %a) + ret void +} + +; Pointer captured + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel70 +define amdgpu_kernel void @test_kernel70(i8 addrspace(4)* addrspace(1)* %sink) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42 + store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink, align 8 + ret void +} + +; Pointer captured inside function call + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel71 +define amdgpu_kernel void @test_kernel71(i8 addrspace(4)* addrspace(1)* %sink) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42 + call void @function3(i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink) + ret void +} + +; Ineffective pointer capture + +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel72 +define amdgpu_kernel void @test_kernel72() #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42 + store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* undef, align 8 + ret void +} + +attributes #0 = { "amdgpu-no-hostcall-ptr" } +attributes #1 = { nounwind readnone speculatable willreturn } +attributes #2 = { "amdgpu-implicitarg-num-bytes"="48" } +attributes #3 = { "amdgpu-implicitarg-num-bytes"="48" "amdgpu-no-hostcall-ptr" } +attributes #4 = { noinline } diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll @@ -0,0 +1,301 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s + +declare void @function1() + +declare void @function2() #0 + +; Function Attrs: noinline +define void @function3(i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink) #2 { + store i8 addrspace(4)* %argptr, i8 addrspace(4)* addrspace(1)* %sink, align 8 + ret void +} + +; Function Attrs: noinline +define void @function4(i64 %arg, i64* %a) #2 { + store i64 %arg, i64* %a + ret void +} + +; Function Attrs: noinline +define void @function5(i8 addrspace(4)* %ptr, i64* %sink) #2 { + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 64 + %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)* + %x = load i64, i64 addrspace(4)* %cast + store i64 %x, i64* %sink + ret void +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1 + +; CHECK: amdhsa.kernels: +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel10 +define amdgpu_kernel void @test_kernel10(i8* %a) { + store i8 3, i8* %a, align 1 + ret void +} + +; Call to an extern function + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel20 +define amdgpu_kernel void @test_kernel20(i8* %a) { + call void @function1() + store i8 3, i8* %a, align 1 + ret void +} + +; Explicit attribute on kernel + +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel21 +define amdgpu_kernel void @test_kernel21(i8* %a) #0 { + call void @function1() + store i8 3, i8* %a, align 1 + ret void +} + +; Explicit attribute on extern callee + +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel22 +define amdgpu_kernel void @test_kernel22(i8* %a) { + call void @function2() + store i8 3, i8* %a, align 1 + ret void +} + +; Access more bytes than the pointer size + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel30 +define amdgpu_kernel void @test_kernel30(i128* %a) { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 72 + %cast = bitcast i8 addrspace(4)* %gep to i128 addrspace(4)* + %x = load i128, i128 addrspace(4)* %cast + store i128 %x, i128* %a + ret void +} + +; Typical load of hostcall buffer pointer + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel40 +define amdgpu_kernel void @test_kernel40(i64* %a) { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 80 + %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)* + %x = load i64, i64 addrspace(4)* %cast + store i64 %x, i64* %a + ret void +} + +; Typical usage, overriden by explicit attribute on kernel + +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel41 +define amdgpu_kernel void @test_kernel41(i64* %a) #0 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 80 + %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)* + %x = load i64, i64 addrspace(4)* %cast + store i64 %x, i64* %a + ret void +} + +; Access to implicit arg before the hostcall pointer + +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel42 +define amdgpu_kernel void @test_kernel42(i64* %a) { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 72 + %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)* + %x = load i64, i64 addrspace(4)* %cast + store i64 %x, i64* %a + ret void +} + +; Access to implicit arg after the hostcall pointer + +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel43 +define amdgpu_kernel void @test_kernel43(i64* %a) { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 88 + %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)* + %x = load i64, i64 addrspace(4)* %cast + store i64 %x, i64* %a + ret void +} + +; Accessing a byte just before the hostcall pointer + +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel44 +define amdgpu_kernel void @test_kernel44(i8* %a) { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 79 + %x = load i8, i8 addrspace(4)* %gep, align 1 + store i8 %x, i8* %a, align 1 + ret void +} + +; Accessing a byte inside the hostcall pointer + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel45 +define amdgpu_kernel void @test_kernel45(i8* %a) { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 80 + %x = load i8, i8 addrspace(4)* %gep, align 1 + store i8 %x, i8* %a, align 1 + ret void +} + +; Accessing a byte inside the hostcall pointer + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel46 +define amdgpu_kernel void @test_kernel46(i8* %a) { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 87 + %x = load i8, i8 addrspace(4)* %gep, align 1 + store i8 %x, i8* %a, align 1 + ret void +} + +; Accessing a byte just after the hostcall pointer + +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel47 +define amdgpu_kernel void @test_kernel47(i8* %a) { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 88 + %x = load i8, i8 addrspace(4)* %gep, align 1 + store i8 %x, i8* %a, align 1 + ret void +} + +; Access with an unknown offset + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel50 +define amdgpu_kernel void @test_kernel50(i8* %a, i32 %b) { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 %b + %x = load i8, i8 addrspace(4)* %gep, align 1 + store i8 %x, i8* %a, align 1 + ret void +} + +; Multiple geps reaching the hostcall pointer argument. + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel51 +define amdgpu_kernel void @test_kernel51(i8* %a) { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16 + %gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 64 + %x = load i8, i8 addrspace(4)* %gep2, align 1 + store i8 %x, i8* %a, align 1 + ret void +} + +; Multiple geps not reaching the hostcall pointer argument. + +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel52 +define amdgpu_kernel void @test_kernel52(i8* %a) { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16 + %gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 16 + %x = load i8, i8 addrspace(4)* %gep2, align 1 + store i8 %x, i8* %a, align 1 + ret void +} + +; Hostcall pointer used inside a function call + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel60 +define amdgpu_kernel void @test_kernel60(i64* %a) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 80 + %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)* + %x = load i64, i64 addrspace(4)* %cast + call void @function4(i64 %x, i64* %a) + ret void +} + +; Hostcall pointer retrieved inside a function call; chain of geps + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel61 +define amdgpu_kernel void @test_kernel61(i64* %a) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16 + call void @function5(i8 addrspace(4)* %gep, i64* %a) + ret void +} + +; Pointer captured + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel70 +define amdgpu_kernel void @test_kernel70(i8 addrspace(4)* addrspace(1)* %sink) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42 + store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink, align 8 + ret void +} + +; Pointer captured inside function call + +; CHECK: - .args: +; CHECK: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel71 +define amdgpu_kernel void @test_kernel71(i8 addrspace(4)* addrspace(1)* %sink) #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42 + call void @function3(i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* %sink) + ret void +} + +; Ineffective pointer capture + +; CHECK: - .args: +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-LABEL: .name: test_kernel72 +define amdgpu_kernel void @test_kernel72() #2 { + %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42 + store i8 addrspace(4)* %gep, i8 addrspace(4)* addrspace(1)* undef, align 8 + ret void +} + +attributes #0 = { "amdgpu-no-hostcall-ptr" } +attributes #1 = { nounwind readnone speculatable willreturn } +attributes #2 = { noinline } diff --git a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll --- a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll +++ b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll @@ -202,13 +202,13 @@ attributes #6 = { "amdgpu-flat-work-group-size"="512,512" } attributes #7 = { "amdgpu-flat-work-group-size"="64,256" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -73,6 +73,6 @@ ;. ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" } ;. -; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll @@ -31,5 +31,5 @@ attributes #0 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll @@ -97,6 +97,6 @@ attributes #0 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll @@ -41,6 +41,6 @@ attributes #2 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll @@ -41,6 +41,6 @@ attributes #1 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll @@ -101,7 +101,7 @@ attributes #0 = { nounwind readnone } attributes #1 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll @@ -61,5 +61,5 @@ attributes #0 = { "uniform-work-group-size"="false" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp @@ -307,11 +307,6 @@ } } - // Set amdgpu_hostcall if host calls have been linked, as needed by newer LLVM - // FIXME: Is there a way to set this during printf() lowering that makes sense - if (ret->getFunction("__ockl_hostcall_internal")) - if (!ret->getModuleFlag("amdgpu_hostcall")) - ret->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1); return ret; }