diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def @@ -0,0 +1,28 @@ +//===--- AMDGPUAttributes.def ---------------------------------*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains descriptions of the various function attributes +// that indicate *absence* of the corresponding implicit kernel +// arguments. +// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +AMDGPU_ATTRIBUTE(DISPATCH_PTR, "amdgpu-no-dispatch-ptr") +AMDGPU_ATTRIBUTE(QUEUE_PTR, "amdgpu-no-queue-ptr") +AMDGPU_ATTRIBUTE(DISPATCH_ID, "amdgpu-no-dispatch-id") +AMDGPU_ATTRIBUTE(IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr") +AMDGPU_ATTRIBUTE(WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x") +AMDGPU_ATTRIBUTE(WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y") +AMDGPU_ATTRIBUTE(WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z") +AMDGPU_ATTRIBUTE(WORKITEM_ID_X, "amdgpu-no-workitem-id-x") +AMDGPU_ATTRIBUTE(WORKITEM_ID_Y, "amdgpu-no-workitem-id-y") +AMDGPU_ATTRIBUTE(WORKITEM_ID_Z, "amdgpu-no-workitem-id-z") + +#undef AMDGPU_ATTRIBUTE diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -22,37 +22,25 @@ using namespace llvm; +#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS, + +enum ImplicitArgumentPositions { + #include "AMDGPUAttributes.def" + LAST_ARG_POS +}; + +#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS, + enum ImplicitArgumentMask { NOT_IMPLICIT_INPUT = 0, - - // SGPRs - DISPATCH_PTR = 1 << 0, - QUEUE_PTR = 1 << 1, - DISPATCH_ID = 1 << 2, - IMPLICIT_ARG_PTR = 1 << 3, - WORKGROUP_ID_X = 1 << 4, - WORKGROUP_ID_Y = 1 << 5, - WORKGROUP_ID_Z = 1 << 6, - - // VGPRS: - WORKITEM_ID_X = 1 << 7, - WORKITEM_ID_Y = 1 << 8, - WORKITEM_ID_Z = 1 << 9, - ALL_ARGUMENT_MASK = (1 << 10) - 1 + #include "AMDGPUAttributes.def" + ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1 }; +#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str}, static constexpr std::pair ImplicitAttrs[] = { - {DISPATCH_PTR, "amdgpu-no-dispatch-ptr"}, - {QUEUE_PTR, "amdgpu-no-queue-ptr"}, - {DISPATCH_ID, "amdgpu-no-dispatch-id"}, - {IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"}, - {WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"}, - {WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y"}, - {WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z"}, - {WORKITEM_ID_X, "amdgpu-no-workitem-id-x"}, - {WORKITEM_ID_Y, "amdgpu-no-workitem-id-y"}, - {WORKITEM_ID_Z, "amdgpu-no-workitem-id-z"} + #include "AMDGPUAttributes.def" }; // We do not need to note the x workitem or workgroup id because they are always @@ -90,7 +78,7 @@ case Intrinsic::amdgcn_queue_ptr: case Intrinsic::amdgcn_is_shared: case Intrinsic::amdgcn_is_private: - // TODO: Does not require queue ptr on gfx9+ + // TODO: Does not require the queue pointer on gfx9+ case Intrinsic::trap: case Intrinsic::debugtrap: IsQueuePtr = true; @@ -152,7 +140,7 @@ } private: - /// Check if the ConstantExpr \p CE requires queue ptr attribute. + /// Check if the ConstantExpr \p CE requires the queue pointer. static bool visitConstExpr(const ConstantExpr *CE) { if (CE->getOpcode() == Instruction::AddrSpaceCast) { unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); @@ -186,7 +174,7 @@ } public: - /// Returns true if \p Fn needs a queue ptr attribute because of \p C. + /// Returns true if \p Fn needs the queue pointer because of \p C. bool needsQueuePtr(const Constant *C, Function &Fn) { bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv()); bool HasAperture = hasApertureRegs(Fn); @@ -205,7 +193,7 @@ } private: - /// Used to determine if the Constant needs a queue ptr attribute. + /// Used to determine if the Constant needs the queue pointer. DenseMap ConstantStatus; }; @@ -388,7 +376,6 @@ return indicatePessimisticFixpoint(); bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); - auto &InfoCache = static_cast(A.getInfoCache()); bool NeedsQueuePtr = false; @@ -410,13 +397,50 @@ } } - // If we found that we need amdgpu-queue-ptr, nothing else to do. + NeedsQueuePtr |= checkForQueuePtr(A); if (NeedsQueuePtr) { removeAssumedBits(QUEUE_PTR); - return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : - ChangeStatus::UNCHANGED; } + return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED + : ChangeStatus::UNCHANGED; + } + + ChangeStatus manifest(Attributor &A) override { + SmallVector AttrList; + LLVMContext &Ctx = getAssociatedFunction()->getContext(); + + for (auto Attr : ImplicitAttrs) { + if (isKnown(Attr.first)) + AttrList.push_back(Attribute::get(Ctx, Attr.second)); + } + + return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, + /* ForceReplace */ true); + } + + const std::string getAsStr() const override { + std::string Str; + raw_string_ostream OS(Str); + OS << "AMDInfo["; + for (auto Attr : ImplicitAttrs) + OS << ' ' << Attr.second; + OS << " ]"; + return OS.str(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} + +private: + bool checkForQueuePtr(Attributor &A) { + Function *F = getAssociatedFunction(); + bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); + + auto &InfoCache = static_cast(A.getInfoCache()); + + bool NeedsQueuePtr = false; + auto CheckAddrSpaceCasts = [&](Instruction &I) { unsigned SrcAS = static_cast(I).getSrcAddressSpace(); if (castRequiresQueuePtr(SrcAS)) { @@ -431,7 +455,7 @@ // `checkForAllInstructions` is much more cheaper than going through all // instructions, try it first. - // amdgpu-queue-ptr is not needed if aperture regs is present. + // The queue pointer is not needed if aperture regs is present. if (!HasApertureRegs) { bool UsedAssumedInformation = false; A.checkForAllInstructions(CheckAddrSpaceCasts, *this, @@ -439,61 +463,26 @@ UsedAssumedInformation); } - // If we found that we need amdgpu-queue-ptr, nothing else to do. - if (NeedsQueuePtr) { - removeAssumedBits(QUEUE_PTR); - return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : - ChangeStatus::UNCHANGED; - } + // If we found that we need the queue pointer, nothing else to do. + if (NeedsQueuePtr) + return true; - if (!IsNonEntryFunc && HasApertureRegs) { - return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : - ChangeStatus::UNCHANGED; - } + if (!IsNonEntryFunc && HasApertureRegs) + return false; for (BasicBlock &BB : *F) { for (Instruction &I : BB) { for (const Use &U : I.operands()) { if (const auto *C = dyn_cast(U)) { - if (InfoCache.needsQueuePtr(C, *F)) { - removeAssumedBits(QUEUE_PTR); - return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : - ChangeStatus::UNCHANGED; - } + if (InfoCache.needsQueuePtr(C, *F)) + return true; } } } } - return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : - ChangeStatus::UNCHANGED; - } - - ChangeStatus manifest(Attributor &A) override { - SmallVector AttrList; - LLVMContext &Ctx = getAssociatedFunction()->getContext(); - - for (auto Attr : ImplicitAttrs) { - if (isKnown(Attr.first)) - AttrList.push_back(Attribute::get(Ctx, Attr.second)); - } - - return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, - /* ForceReplace */ true); - } - - const std::string getAsStr() const override { - std::string Str; - raw_string_ostream OS(Str); - OS << "AMDInfo["; - for (auto Attr : ImplicitAttrs) - OS << ' ' << Attr.second; - OS << " ]"; - return OS.str(); + return false; } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override {} }; AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,