Index: lib/Target/AMDGPU/AMDGPU.h =================================================================== --- lib/Target/AMDGPU/AMDGPU.h +++ lib/Target/AMDGPU/AMDGPU.h @@ -24,6 +24,7 @@ class Target; class TargetMachine; class PassRegistry; +class Module; // R600 Passes FunctionPass *createR600VectorRegMerger(TargetMachine &tm); @@ -167,44 +168,52 @@ /// however on the GPU, each address space points to /// a separate piece of memory that is unique from other /// memory locations. -namespace AMDGPUAS { -enum AddressSpaces : unsigned { - PRIVATE_ADDRESS = 0, ///< Address space for private memory. - GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). - CONSTANT_ADDRESS = 2, ///< Address space for constant memory (VTX2) - LOCAL_ADDRESS = 3, ///< Address space for local memory. - FLAT_ADDRESS = 4, ///< Address space for flat memory. - REGION_ADDRESS = 5, ///< Address space for region memory. - PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0) - PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1) +struct AMDGPUAS { + // The following address space values depend on the triple environment. + unsigned PRIVATE_ADDRESS; ///< Address space for private memory. + unsigned CONSTANT_ADDRESS; ///< Address space for constant memory (VTX2) + unsigned FLAT_ADDRESS; ///< Address space for flat memory. + unsigned REGION_ADDRESS; ///< Address space for region memory. + + const static unsigned GLOBAL_ADDRESS = 1; ///< Address space for global memory (RAT0, VTX0). + const static unsigned LOCAL_ADDRESS = 3; ///< Address space for local memory. + const static unsigned PARAM_D_ADDRESS = 6; ///< Address space for direct addressible parameter memory (CONST0) + const static unsigned PARAM_I_ADDRESS = 7; ///< Address space for indirect addressible parameter memory (VTX1) // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on this // order to be able to dynamically index a constant buffer, for example: // // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx - CONSTANT_BUFFER_0 = 8, - CONSTANT_BUFFER_1 = 9, - CONSTANT_BUFFER_2 = 10, - CONSTANT_BUFFER_3 = 11, - CONSTANT_BUFFER_4 = 12, - CONSTANT_BUFFER_5 = 13, - CONSTANT_BUFFER_6 = 14, - CONSTANT_BUFFER_7 = 15, - CONSTANT_BUFFER_8 = 16, - CONSTANT_BUFFER_9 = 17, - CONSTANT_BUFFER_10 = 18, - CONSTANT_BUFFER_11 = 19, - CONSTANT_BUFFER_12 = 20, - CONSTANT_BUFFER_13 = 21, - CONSTANT_BUFFER_14 = 22, - CONSTANT_BUFFER_15 = 23, + const static unsigned CONSTANT_BUFFER_0 = 8; + const static unsigned CONSTANT_BUFFER_1 = 9; + const static unsigned CONSTANT_BUFFER_2 = 10; + const static unsigned CONSTANT_BUFFER_3 = 11; + const static unsigned CONSTANT_BUFFER_4 = 12; + const static unsigned CONSTANT_BUFFER_5 = 13; + const static unsigned CONSTANT_BUFFER_6 = 14; + const static unsigned CONSTANT_BUFFER_7 = 15; + const static unsigned CONSTANT_BUFFER_8 = 16; + const static unsigned CONSTANT_BUFFER_9 = 17; + const static unsigned CONSTANT_BUFFER_10 = 18; + const static unsigned CONSTANT_BUFFER_11 = 19; + const static unsigned CONSTANT_BUFFER_12 = 20; + const static unsigned CONSTANT_BUFFER_13 = 21; + const static unsigned CONSTANT_BUFFER_14 = 22; + const static unsigned CONSTANT_BUFFER_15 = 23; // Some places use this if the address space can't be determined. - UNKNOWN_ADDRESS_SPACE = ~0u + const static unsigned UNKNOWN_ADDRESS_SPACE = ~0u; + }; -} // namespace AMDGPUAS +namespace llvm { +namespace AMDGPU { +AMDGPUAS getAMDGPUAS(const Module &M); +AMDGPUAS getAMDGPUAS(const TargetMachine &TM); +AMDGPUAS getAMDGPUAS(Triple T); +} // namespace AMDGPU +} // namespace llvm /// AMDGPU-specific synchronization scopes. enum class AMDGPUSynchronizationScope : uint8_t { Index: lib/Target/AMDGPU/AMDGPUAliasAnalysis.h =================================================================== --- lib/Target/AMDGPU/AMDGPUAliasAnalysis.h +++ lib/Target/AMDGPU/AMDGPUAliasAnalysis.h @@ -13,6 +13,7 @@ #ifndef LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H #define LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H +#include "AMDGPU.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" @@ -25,11 +26,12 @@ friend AAResultBase; const DataLayout &DL; + AMDGPUAS AS; public: explicit AMDGPUAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {} AMDGPUAAResult(AMDGPUAAResult &&Arg) - : AAResultBase(std::move(Arg)), DL(Arg.DL){} + : AAResultBase(std::move(Arg)), DL(Arg.DL), AS(Arg.AS){} /// Handle invalidation events from the new pass manager. /// Index: lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -28,6 +28,7 @@ class AMDGPUAnnotateKernelFeatures : public ModulePass { private: const TargetMachine *TM; + AMDGPUAS AS; static bool hasAddrSpaceCast(const Function &F); void addAttrToCallers(Function *Intrin, StringRef AttrName); @@ -65,18 +66,19 @@ // The queue ptr is only needed when casting to flat, not from it. -static bool castRequiresQueuePtr(unsigned SrcAS) { - return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; +static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) { + return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS; } -static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) { - return castRequiresQueuePtr(ASC->getSrcAddressSpace()); +static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC, + const AMDGPUAS &AS) { + return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS); } bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { if (CE->getOpcode() == Instruction::AddrSpaceCast) { unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); - return castRequiresQueuePtr(SrcAS); + return castRequiresQueuePtr(SrcAS, AS); } return false; @@ -173,6 +175,7 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { Triple TT(M.getTargetTriple()); + AS = AMDGPU::getAMDGPUAS(M); static const StringRef IntrinsicToAttr[][2] = { // .x omitted Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -67,10 +67,11 @@ // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can // make the right decision when generating code for different targets. const AMDGPUSubtarget *Subtarget; + AMDGPUAS AMDGPUASI; public: explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(TM, OptLevel) {} + : SelectionDAGISel(TM, OptLevel), AS(AMDGPU::getAMDGPUAS(TM)){} ~AMDGPUDAGToDAGISel() override = default; bool runOnMachineFunction(MachineFunction &MF) override; @@ -586,7 +587,7 @@ if (!N->readMem()) return false; if (CbId == -1) - return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; + return N->getAddressSpace() == AS.CONSTANT_ADDRESS; return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; } Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp @@ -208,20 +208,19 @@ } } -static KernelArg::AddressSpaceQualifer getRuntimeAddrSpace( - AMDGPUAS::AddressSpaces A) { - switch (A) { - case AMDGPUAS::GLOBAL_ADDRESS: +static KernelArg::AddressSpaceQualifer getRuntimeAddrSpace(unsigned A, + AMDGPUAS AS) { + if (A == AS.GLOBAL_ADDRESS) return KernelArg::Global; - case AMDGPUAS::CONSTANT_ADDRESS: + else if (A == AS.CONSTANT_ADDRESS) return KernelArg::Constant; - case AMDGPUAS::LOCAL_ADDRESS: + else if (A == AS.LOCAL_ADDRESS) return KernelArg::Local; - case AMDGPUAS::FLAT_ADDRESS: + else if (A == AS.FLAT_ADDRESS) return KernelArg::Generic; - case AMDGPUAS::REGION_ADDRESS: + else if (A == AS.REGION_ADDRESS) return KernelArg::Region; - default: + else return KernelArg::Private; } } Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" @@ -737,5 +738,32 @@ isUInt<20>(EncodedOffset); } +AMDGPUAS getAMDGPUAS(Triple T) { + auto Env = T.getEnvironment(); + AMDGPUAS AS; + if (Env == Triple::AMDGIZ || Env == Triple::AMDGIZCL) { + AS.FLAT_ADDRESS = 0; + AS.CONSTANT_ADDRESS = 4; + AS.PRIVATE_ADDRESS = 5; + AS.REGION_ADDRESS = 2; + } + else { + AS.FLAT_ADDRESS = 4; + AS.CONSTANT_ADDRESS = 2; + AS.PRIVATE_ADDRESS = 0; + AS.REGION_ADDRESS = 5; + } +} + +AMDGPUAS getAMDGPUAS(TargetMachine &M) { + return getAMDGPUAS(M.getTargetTriple()); +} + +AMDGPUAS getAMDGPUAS(const Module &M) { + return getAMDGPUAS(Triple(M.getTargetTriple())); +} + } // end namespace AMDGPU + + } // end namespace llvm