diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -96,6 +96,8 @@ void initializeAMDGPUAlwaysInlinePass(PassRegistry&); Pass *createAMDGPUAnnotateKernelFeaturesPass(); +Pass *createAMDGPUAttributorPass(); +void initializeAMDGPUAttributorPass(PassRegistry &); void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); extern char &AMDGPUAnnotateKernelFeaturesID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -0,0 +1,273 @@ +//===- AMDGPUAttributor.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This pass uses Attributor framework to deduce AMDGPU attributes. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/IntrinsicsR600.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/IPO/Attributor.h" + +#define DEBUG_TYPE "amdgpu-attributor" + +using namespace llvm; + +static constexpr StringLiteral ImplicitAttrNames[] = { + // X ids unnecessarily propagated to kernels. + "amdgpu-work-item-id-x", "amdgpu-work-item-id-y", + "amdgpu-work-item-id-z", "amdgpu-work-group-id-x", + "amdgpu-work-group-id-y", "amdgpu-work-group-id-z", + "amdgpu-dispatch-ptr", "amdgpu-dispatch-id", + "amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"}; + +// We do not need to note the x workitem or workgroup id because they are always +// initialized. +// +// TODO: We should not add the attributes if the known compile time workgroup +// size is 1 for y/z. +static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly, + bool &IsQueuePtr) { + switch (ID) { + case Intrinsic::amdgcn_workitem_id_x: + NonKernelOnly = true; + return "amdgpu-work-item-id-x"; + case Intrinsic::amdgcn_workgroup_id_x: + NonKernelOnly = true; + return "amdgpu-work-group-id-x"; + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::r600_read_tidig_y: + return "amdgpu-work-item-id-y"; + case Intrinsic::amdgcn_workitem_id_z: + case Intrinsic::r600_read_tidig_z: + return "amdgpu-work-item-id-z"; + case Intrinsic::amdgcn_workgroup_id_y: + case Intrinsic::r600_read_tgid_y: + return "amdgpu-work-group-id-y"; + case Intrinsic::amdgcn_workgroup_id_z: + case Intrinsic::r600_read_tgid_z: + return "amdgpu-work-group-id-z"; + case Intrinsic::amdgcn_dispatch_ptr: + return "amdgpu-dispatch-ptr"; + case Intrinsic::amdgcn_dispatch_id: + return "amdgpu-dispatch-id"; + case Intrinsic::amdgcn_kernarg_segment_ptr: + return "amdgpu-kernarg-segment-ptr"; + case Intrinsic::amdgcn_implicitarg_ptr: + return "amdgpu-implicitarg-ptr"; + case Intrinsic::amdgcn_queue_ptr: + case Intrinsic::amdgcn_is_shared: + case Intrinsic::amdgcn_is_private: + // TODO: Does not require queue ptr on gfx9+ + case Intrinsic::trap: + case Intrinsic::debugtrap: + IsQueuePtr = true; + return "amdgpu-queue-ptr"; + default: + return ""; + } +} + +struct AAAMDInfo : public StateWrapper { + using Base = StateWrapper; + AAAMDInfo(const IRPosition &IRP, Attributor &A) : Base(IRP) {} + + /// Create an abstract attribute view for the position \p IRP. + static AAAMDInfo &createForPosition(const IRPosition &IRP, Attributor &A); + + /// See AbstractAttribute::getName(). + const std::string getName() const override { return "AAAMDAttributes"; } + + /// See AbstractAttribute::getIdAddr(). + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAAMDAttributes. + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + virtual const DenseSet &getAttributes() const = 0; + + /// Unique ID (due to the unique address) + static const char ID; +}; +const char AAAMDInfo::ID = 0; + +struct AAAMDInfoFunction : public AAAMDInfo { + AAAMDInfoFunction(const IRPosition &IRP, Attributor &A) : AAAMDInfo(IRP, A) {} + + void initialize(Attributor &A) override { + Function *F = getAssociatedFunction(); + CallingConv::ID CC = F->getCallingConv(); + bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx); + + // Ignore functions with graphics calling conventions, these are currently + // not allowed to have kernel arguments. + if (AMDGPU::isGraphics(F->getCallingConv())) { + indicatePessimisticFixpoint(); + return; + } + + for (StringRef Attr : ImplicitAttrNames) { + if (F->hasFnAttribute(Attr)) + Attributes.insert(Attr); + } + + // If this function hasAddressTaken() = true + // then add all attributes corresponding to the implicit args. + if (CallingConvSupportsAllImplicits && + F->hasAddressTaken(nullptr, true, true, true)) { + for (StringRef AttrName : ImplicitAttrNames) { + Attributes.insert(AttrName); + } + } + } + + ChangeStatus updateImpl(Attributor &A) override { + Function *F = getAssociatedFunction(); + ChangeStatus Change = ChangeStatus::UNCHANGED; + bool IsFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); + CallingConv::ID CC = F->getCallingConv(); + bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx); + + auto AddAttribute = [&](StringRef AttrName) { + if (Attributes.insert(AttrName).second) + Change = ChangeStatus::CHANGED; + }; + + // Check for Intrinsics and propagate attributes. + const AACallEdges &AAEdges = A.getAAFor( + *this, this->getIRPosition(), DepClassTy::REQUIRED); + + // We have to assume that we can reach a function with these attributes. + if (CallingConvSupportsAllImplicits && AAEdges.hasUnknownCallee()) { + for (StringRef AttrName : ImplicitAttrNames) { + AddAttribute(AttrName); + } + } + + for (Function *Callee : AAEdges.getOptimisticEdges()) { + Intrinsic::ID IID = Callee->getIntrinsicID(); + if (IID != Intrinsic::not_intrinsic) { + // kernel only + if (!IsFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) { + AddAttribute("amdgpu-kernarg-segment-ptr"); + continue; + } + + bool NonKernelOnly = false, NeedsQueuePtr = false; + StringRef AttrName = + intrinsicToAttrName(IID, NonKernelOnly, NeedsQueuePtr); + + if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) + AddAttribute(AttrName); + if (NeedsQueuePtr) + AddAttribute("amdgpu-queue-ptr"); + + continue; + } + + const AAAMDInfo &AAAMD = A.getAAFor( + *this, IRPosition::function(*F), DepClassTy::REQUIRED); + const DenseSet CalleeAttributes = AAAMD.getAttributes(); + // Propagate implicit attributes from called function. + for (StringRef AttrName : ImplicitAttrNames) + if (CalleeAttributes.count(AttrName)) + AddAttribute(AttrName); + } + + // Check the function body. + auto CheckAlloca = [&](Instruction &I) { + AddAttribute("amdgpu-stack-objects"); + return false; + }; + + A.checkForAllInstructions(CheckAlloca, *this, {Instruction::Alloca}); + + auto CheckAddrSpaceCasts = [&](Instruction &I) { + unsigned SrcAS = static_cast(I).getSrcAddressSpace(); + if (SrcAS == AMDGPUAS::LOCAL_ADDRESS || + SrcAS == AMDGPUAS::PRIVATE_ADDRESS) + AddAttribute("amdgpu-queue-ptr"); + return true; + }; + + A.checkForAllInstructions(CheckAddrSpaceCasts, *this, + {Instruction::AddrSpaceCast}); + + return Change; + } + + ChangeStatus manifest(Attributor &A) override { + SmallVector AttrList; + LLVMContext &Ctx = getAssociatedFunction()->getContext(); + + for (StringRef AttrName : Attributes) + AttrList.push_back(Attribute::get(Ctx, AttrName)); + + return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList); + } + + const std::string getAsStr() const override { + return "AMDInfo[" + std::to_string(Attributes.size()) + "]"; + } + + const DenseSet &getAttributes() const override { + return Attributes; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} + +private: + DenseSet Attributes; +}; + +AAAMDInfo &AAAMDInfo::createForPosition(const IRPosition &IRP, Attributor &A) { + if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) + return *new (A.Allocator) AAAMDInfoFunction(IRP, A); + llvm_unreachable("AAAMDInfo is only valid for function position"); +} + +class AMDGPUAttributor : public ModulePass { +public: + AMDGPUAttributor() : ModulePass(ID) {} + + bool runOnModule(Module &M) override { + SetVector Functions; + AnalysisGetter AG; + for (Function &F : M) + Functions.insert(&F); + + CallGraphUpdater CGUpdater; + BumpPtrAllocator Allocator; + InformationCache InfoCache(M, AG, Allocator, nullptr); + Attributor A(Functions, InfoCache, CGUpdater); + + for (Function &F : M) + A.getOrCreateAAFor(IRPosition::function(F)); + + ChangeStatus Change = A.run(); + return Change == ChangeStatus::CHANGED; + } + + StringRef getPassName() const override { return "AMDGPU Attributor"; } + + TargetMachine *TM; + static char ID; +}; + +char AMDGPUAttributor::ID = 0; + +Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); } +INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -234,6 +234,7 @@ initializeSILoadStoreOptimizerPass(*PR); initializeAMDGPUFixFunctionBitcastsPass(*PR); initializeAMDGPUAlwaysInlinePass(*PR); + initializeAMDGPUAttributorPass(*PR); initializeAMDGPUAnnotateKernelFeaturesPass(*PR); initializeAMDGPUAnnotateUniformValuesPass(*PR); initializeAMDGPUArgumentUsageInfoPass(*PR); diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -44,6 +44,7 @@ AMDGPUAliasAnalysis.cpp AMDGPUAlwaysInlinePass.cpp AMDGPUAnnotateKernelFeatures.cpp + AMDGPUAttributor.cpp AMDGPUAnnotateUniformValues.cpp AMDGPUArgumentUsageInfo.cpp AMDGPUAsmPrinter.cpp diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -2032,6 +2032,8 @@ // The alignment of a pointer is interesting for loads. case Instruction::Store: // The alignment of a pointer is interesting for stores. + case Instruction::Alloca: + case Instruction::AddrSpaceCast: IsInterestingOpcode = true; } if (IsInterestingOpcode) { diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -1,4 +1,5 @@ ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s +; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefix=HSA %s target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=NOHSA -check-prefix=ALL %s +; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor < %s | FileCheck -check-prefix=NOHSA -check-prefix=ALL %s declare i32 @llvm.r600.read.tgid.x() #0 declare i32 @llvm.r600.read.tgid.y() #0 diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll --- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefix=GCN %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s @@ -47,5 +48,6 @@ ret void } +;FIXME: The AMDGPU Attributor does not deduce the uniform-group-size attribute. ; attributes #0 = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } ; attributes #1 = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }