diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUAttributor.h" #include "GCNSubtarget.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" @@ -23,15 +24,9 @@ #define DEBUG_TYPE "amdgpu-annotate-kernel-features" using namespace llvm; +using namespace AMDGPU; namespace { -static constexpr StringLiteral ImplicitAttrNames[] = { - // X ids unnecessarily propagated to kernels. - "amdgpu-work-item-id-x", "amdgpu-work-item-id-y", - "amdgpu-work-item-id-z", "amdgpu-work-group-id-x", - "amdgpu-work-group-id-y", "amdgpu-work-group-id-z", - "amdgpu-dispatch-ptr", "amdgpu-dispatch-id", - "amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"}; class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { private: @@ -77,22 +72,10 @@ // The queue ptr is only needed when casting to flat, not from it. -static bool castRequiresQueuePtr(unsigned SrcAS) { - return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; -} - static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) { return castRequiresQueuePtr(ASC->getSrcAddressSpace()); } -static bool isDSAddress(const Constant *C) { - const GlobalValue *GV = dyn_cast(C); - if (!GV) - return false; - unsigned AS = GV->getAddressSpace(); - return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; -} - bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { if (CE->getOpcode() == Instruction::AddrSpaceCast) { unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); @@ -142,54 +125,6 @@ return false; } -// We do not need to note the x workitem or workgroup id because they are always -// initialized. -// -// TODO: We should not add the attributes if the known compile time workgroup -// size is 1 for y/z. -static StringRef intrinsicToAttrName(Intrinsic::ID ID, - bool &NonKernelOnly, - bool &IsQueuePtr) { - switch (ID) { - case Intrinsic::amdgcn_workitem_id_x: - NonKernelOnly = true; - return "amdgpu-work-item-id-x"; - case Intrinsic::amdgcn_workgroup_id_x: - NonKernelOnly = true; - return "amdgpu-work-group-id-x"; - case Intrinsic::amdgcn_workitem_id_y: - case Intrinsic::r600_read_tidig_y: - return "amdgpu-work-item-id-y"; - case Intrinsic::amdgcn_workitem_id_z: - case Intrinsic::r600_read_tidig_z: - return "amdgpu-work-item-id-z"; - case Intrinsic::amdgcn_workgroup_id_y: - case Intrinsic::r600_read_tgid_y: - return "amdgpu-work-group-id-y"; - case Intrinsic::amdgcn_workgroup_id_z: - case Intrinsic::r600_read_tgid_z: - return "amdgpu-work-group-id-z"; - case Intrinsic::amdgcn_dispatch_ptr: - return "amdgpu-dispatch-ptr"; - case Intrinsic::amdgcn_dispatch_id: - return "amdgpu-dispatch-id"; - case Intrinsic::amdgcn_kernarg_segment_ptr: - return "amdgpu-kernarg-segment-ptr"; - case Intrinsic::amdgcn_implicitarg_ptr: - return "amdgpu-implicitarg-ptr"; - case Intrinsic::amdgcn_queue_ptr: - case Intrinsic::amdgcn_is_shared: - case Intrinsic::amdgcn_is_private: - // TODO: Does not require queue ptr on gfx9+ - case Intrinsic::trap: - case Intrinsic::debugtrap: - IsQueuePtr = true; - return "amdgpu-queue-ptr"; - default: - return ""; - } -} - static bool handleAttr(Function &Parent, const Function &Callee, StringRef Name) { if (Callee.hasFnAttribute(Name)) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.h b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.h @@ -0,0 +1,88 @@ +//===- AMDGPUAttributor.h - Attributor's util functions and tables -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUATTRIBUTOR_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUATTRIBUTOR_H + +#include "AMDGPU.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/IntrinsicsR600.h" + +namespace llvm { + +class StringLiteral; + +namespace AMDGPU { + +extern const StringLiteral ImplicitAttrNames[10]; + +// We do not need to note the x workitem or workgroup id because they are always +// initialized. +// +// TODO: We should not add the attributes if the known compile time workgroup +// size is 1 for y/z. +inline StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly, + bool &IsQueuePtr) { + switch (ID) { + case Intrinsic::amdgcn_workitem_id_x: + NonKernelOnly = true; + return "amdgpu-work-item-id-x"; + case Intrinsic::amdgcn_workgroup_id_x: + NonKernelOnly = true; + return "amdgpu-work-group-id-x"; + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::r600_read_tidig_y: + return "amdgpu-work-item-id-y"; + case Intrinsic::amdgcn_workitem_id_z: + case Intrinsic::r600_read_tidig_z: + return "amdgpu-work-item-id-z"; + case Intrinsic::amdgcn_workgroup_id_y: + case Intrinsic::r600_read_tgid_y: + return "amdgpu-work-group-id-y"; + case Intrinsic::amdgcn_workgroup_id_z: + case Intrinsic::r600_read_tgid_z: + return "amdgpu-work-group-id-z"; + case Intrinsic::amdgcn_dispatch_ptr: + return "amdgpu-dispatch-ptr"; + case Intrinsic::amdgcn_dispatch_id: + return "amdgpu-dispatch-id"; + case Intrinsic::amdgcn_kernarg_segment_ptr: + return "amdgpu-kernarg-segment-ptr"; + case Intrinsic::amdgcn_implicitarg_ptr: + return "amdgpu-implicitarg-ptr"; + case Intrinsic::amdgcn_queue_ptr: + case Intrinsic::amdgcn_is_shared: + case Intrinsic::amdgcn_is_private: + // TODO: Does not require queue ptr on gfx9+ + case Intrinsic::trap: + case Intrinsic::debugtrap: + IsQueuePtr = true; + return "amdgpu-queue-ptr"; + default: + return ""; + } +} + +inline bool castRequiresQueuePtr(unsigned SrcAS) { + return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; +} + +inline bool isDSAddress(const Constant *C) { + const GlobalValue *GV = dyn_cast(C); + if (!GV) + return false; + unsigned AS = GV->getAddressSpace(); + return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; +} + +} // namespace AMDGPU +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUATTRIBUTOR_H diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "AMDGPUAttributor.h" #include "AMDGPU.h" #include "GCNSubtarget.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -20,9 +21,10 @@ #define DEBUG_TYPE "amdgpu-attributor" -using namespace llvm; +namespace llvm { +namespace AMDGPU { -static constexpr StringLiteral ImplicitAttrNames[] = { +const StringLiteral ImplicitAttrNames[10] = { // X ids unnecessarily propagated to kernels. "amdgpu-work-item-id-x", "amdgpu-work-item-id-y", "amdgpu-work-item-id-z", "amdgpu-work-group-id-x", @@ -30,64 +32,13 @@ "amdgpu-dispatch-ptr", "amdgpu-dispatch-id", "amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"}; -// We do not need to note the x workitem or workgroup id because they are always -// initialized. -// -// TODO: We should not add the attributes if the known compile time workgroup -// size is 1 for y/z. -static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly, - bool &IsQueuePtr) { - switch (ID) { - case Intrinsic::amdgcn_workitem_id_x: - NonKernelOnly = true; - return "amdgpu-work-item-id-x"; - case Intrinsic::amdgcn_workgroup_id_x: - NonKernelOnly = true; - return "amdgpu-work-group-id-x"; - case Intrinsic::amdgcn_workitem_id_y: - case Intrinsic::r600_read_tidig_y: - return "amdgpu-work-item-id-y"; - case Intrinsic::amdgcn_workitem_id_z: - case Intrinsic::r600_read_tidig_z: - return "amdgpu-work-item-id-z"; - case Intrinsic::amdgcn_workgroup_id_y: - case Intrinsic::r600_read_tgid_y: - return "amdgpu-work-group-id-y"; - case Intrinsic::amdgcn_workgroup_id_z: - case Intrinsic::r600_read_tgid_z: - return "amdgpu-work-group-id-z"; - case Intrinsic::amdgcn_dispatch_ptr: - return "amdgpu-dispatch-ptr"; - case Intrinsic::amdgcn_dispatch_id: - return "amdgpu-dispatch-id"; - case Intrinsic::amdgcn_kernarg_segment_ptr: - return "amdgpu-kernarg-segment-ptr"; - case Intrinsic::amdgcn_implicitarg_ptr: - return "amdgpu-implicitarg-ptr"; - case Intrinsic::amdgcn_queue_ptr: - case Intrinsic::amdgcn_is_shared: - case Intrinsic::amdgcn_is_private: - // TODO: Does not require queue ptr on gfx9+ - case Intrinsic::trap: - case Intrinsic::debugtrap: - IsQueuePtr = true; - return "amdgpu-queue-ptr"; - default: - return ""; - } -} +} // namespace AMDGPU +} // namespace llvm -static bool castRequiresQueuePtr(unsigned SrcAS) { - return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; -} +using namespace llvm; +using namespace AMDGPU; -static bool isDSAddress(const Constant *C) { - const GlobalValue *GV = dyn_cast(C); - if (!GV) - return false; - unsigned AS = GV->getAddressSpace(); - return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; -} +namespace { class AMDGPUInformationCache : public InformationCache { public: @@ -188,7 +139,6 @@ /// Unique ID (due to the unique address) static const char ID; }; -const char AAAMDAttributes::ID = 0; struct AAAMDWorkGroupSize : public StateWrapper { @@ -214,7 +164,6 @@ /// Unique ID (due to the unique address) static const char ID; }; -const char AAAMDWorkGroupSize::ID = 0; struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize { AAAMDWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A) @@ -286,13 +235,6 @@ void trackStatistics() const override {} }; -AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP, - Attributor &A) { - if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) - return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A); - llvm_unreachable("AAAMDWorkGroupSize is only valid for function position"); -} - struct AAAMDAttributesFunction : public AAAMDAttributes { AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A) : AAAMDAttributes(IRP, A) {} @@ -469,13 +411,6 @@ DenseSet Attributes; }; -AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, - Attributor &A) { - if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) - return *new (A.Allocator) AAAMDAttributesFunction(IRP, A); - llvm_unreachable("AAAMDAttributes is only valid for function position"); -} - class AMDGPUAttributor : public ModulePass { public: AMDGPUAttributor() : ModulePass(ID) {} @@ -519,8 +454,25 @@ TargetMachine *TM; static char ID; }; +} // end anonymous namespace +const char AAAMDAttributes::ID = 0; +const char AAAMDWorkGroupSize::ID = 0; char AMDGPUAttributor::ID = 0; +AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP, + Attributor &A) { + if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) + return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A); + llvm_unreachable("AAAMDWorkGroupSize is only valid for function position"); +} + +AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, + Attributor &A) { + if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) + return *new (A.Allocator) AAAMDAttributesFunction(IRP, A); + llvm_unreachable("AAAMDAttributes is only valid for function position"); +} + Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); } INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)