Index: llvm/trunk/lib/Target/AMDGPU/AMDGPU.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPU.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.h @@ -23,6 +23,7 @@ class Target; class TargetMachine; class PassRegistry; +class Module; // R600 Passes FunctionPass *createR600VectorRegMerger(TargetMachine &tm); @@ -150,43 +151,53 @@ /// however on the GPU, each address space points to /// a separate piece of memory that is unique from other /// memory locations. -namespace AMDGPUAS { -enum AddressSpaces : unsigned { - PRIVATE_ADDRESS = 0, ///< Address space for private memory. - GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). - CONSTANT_ADDRESS = 2, ///< Address space for constant memory (VTX2) - LOCAL_ADDRESS = 3, ///< Address space for local memory. - FLAT_ADDRESS = 4, ///< Address space for flat memory. - REGION_ADDRESS = 5, ///< Address space for region memory. - PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0) - PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1) +struct AMDGPUAS { + // The following address space values depend on the triple environment. + unsigned PRIVATE_ADDRESS; ///< Address space for private memory. + unsigned CONSTANT_ADDRESS; ///< Address space for constant memory (VTX2) + unsigned FLAT_ADDRESS; ///< Address space for flat memory. + unsigned REGION_ADDRESS; ///< Address space for region memory. + + // The maximum value for flat, generic, local, private, constant and region. + const static unsigned MAX_COMMON_ADDRESS = 5; + + const static unsigned GLOBAL_ADDRESS = 1; ///< Address space for global memory (RAT0, VTX0). + const static unsigned LOCAL_ADDRESS = 3; ///< Address space for local memory. + const static unsigned PARAM_D_ADDRESS = 6; ///< Address space for direct addressible parameter memory (CONST0) + const static unsigned PARAM_I_ADDRESS = 7; ///< Address space for indirect addressible parameter memory (VTX1) // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on this // order to be able to dynamically index a constant buffer, for example: // // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx - CONSTANT_BUFFER_0 = 8, - CONSTANT_BUFFER_1 = 9, - CONSTANT_BUFFER_2 = 10, - CONSTANT_BUFFER_3 = 11, - CONSTANT_BUFFER_4 = 12, - CONSTANT_BUFFER_5 = 13, - CONSTANT_BUFFER_6 = 14, - CONSTANT_BUFFER_7 = 15, - CONSTANT_BUFFER_8 = 16, - CONSTANT_BUFFER_9 = 17, - CONSTANT_BUFFER_10 = 18, - CONSTANT_BUFFER_11 = 19, - CONSTANT_BUFFER_12 = 20, - CONSTANT_BUFFER_13 = 21, - CONSTANT_BUFFER_14 = 22, - CONSTANT_BUFFER_15 = 23, + const static unsigned CONSTANT_BUFFER_0 = 8; + const static unsigned CONSTANT_BUFFER_1 = 9; + const static unsigned CONSTANT_BUFFER_2 = 10; + const static unsigned CONSTANT_BUFFER_3 = 11; + const static unsigned CONSTANT_BUFFER_4 = 12; + const static unsigned CONSTANT_BUFFER_5 = 13; + const static unsigned CONSTANT_BUFFER_6 = 14; + const static unsigned CONSTANT_BUFFER_7 = 15; + const static unsigned CONSTANT_BUFFER_8 = 16; + const static unsigned CONSTANT_BUFFER_9 = 17; + const static unsigned CONSTANT_BUFFER_10 = 18; + const static unsigned CONSTANT_BUFFER_11 = 19; + const static unsigned CONSTANT_BUFFER_12 = 20; + const static unsigned CONSTANT_BUFFER_13 = 21; + const static unsigned CONSTANT_BUFFER_14 = 22; + const static unsigned CONSTANT_BUFFER_15 = 23; // Some places use this if the address space can't be determined. - UNKNOWN_ADDRESS_SPACE = ~0u + const static unsigned UNKNOWN_ADDRESS_SPACE = ~0u; }; -} // namespace AMDGPUAS +namespace llvm { +namespace AMDGPU { +AMDGPUAS getAMDGPUAS(const Module &M); +AMDGPUAS getAMDGPUAS(const TargetMachine &TM); +AMDGPUAS getAMDGPUAS(Triple T); +} // namespace AMDGPU +} // namespace llvm #endif Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h @@ -13,6 +13,7 @@ #ifndef LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H #define LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H +#include "AMDGPU.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" @@ -25,11 +26,14 @@ friend AAResultBase; const DataLayout &DL; + AMDGPUAS AS; public: - explicit AMDGPUAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {} + explicit AMDGPUAAResult(const DataLayout &DL, Triple T) : AAResultBase(), + DL(DL), AS(AMDGPU::getAMDGPUAS(T)), ASAliasRules(AS) {} AMDGPUAAResult(AMDGPUAAResult &&Arg) - : AAResultBase(std::move(Arg)), DL(Arg.DL){} + : AAResultBase(std::move(Arg)), DL(Arg.DL), AS(Arg.AS), + ASAliasRules(Arg.ASAliasRules){} /// Handle invalidation events from the new pass manager. /// @@ -42,6 +46,15 @@ private: bool Aliases(const MDNode *A, const MDNode *B) const; bool PathAliases(const MDNode *A, const MDNode *B) const; + + class ASAliasRulesTy { + public: + ASAliasRulesTy(AMDGPUAS AS_); + AliasResult getAliasResult(unsigned AS1, unsigned AS2) const; + private: + AMDGPUAS AS; + const AliasResult (*ASAliasRules)[6][6]; + } ASAliasRules; }; /// Analysis pass providing a never-invalidated alias analysis result. @@ -53,7 +66,8 @@ typedef AMDGPUAAResult Result; AMDGPUAAResult run(Function &F, AnalysisManager &AM) { - return AMDGPUAAResult(F.getParent()->getDataLayout()); + return AMDGPUAAResult(F.getParent()->getDataLayout(), + Triple(F.getParent()->getTargetTriple())); } }; @@ -72,7 +86,8 @@ const AMDGPUAAResult &getResult() const { return *Result; } bool doInitialization(Module &M) override { - Result.reset(new AMDGPUAAResult(M.getDataLayout())); + Result.reset(new AMDGPUAAResult(M.getDataLayout(), + Triple(M.getTargetTriple()))); return false; } bool doFinalization(Module &M) override { Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -37,26 +37,60 @@ AU.setPreservesAll(); } +// Must match the table in getAliasResult. +AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_) : AS(AS_) { + // These arrarys are indexed by address space value + // enum elements 0 ... to 5 + static const AliasResult ASAliasRulesPrivIsZero[6][6] = { + /* Private Global Constant Group Flat Region*/ + /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias}, + /* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias, NoAlias}, + /* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, NoAlias}, + /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias}, + /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}, + /* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias} + }; + static const AliasResult ASAliasRulesGenIsZero[6][6] = { + /* Flat Global Region Group Constant Private */ + /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}, + /* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , NoAlias}, + /* Region */ {NoAlias , NoAlias , MayAlias, NoAlias, NoAlias , MayAlias}, + /* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias}, + /* Constant */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias}, + /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias} + }; + assert(AS.MAX_COMMON_ADDRESS <= 5); + if (AS.FLAT_ADDRESS == 0) { + assert(AS.GLOBAL_ADDRESS == 1 && + AS.REGION_ADDRESS == 2 && + AS.LOCAL_ADDRESS == 3 && + AS.CONSTANT_ADDRESS == 4 && + AS.PRIVATE_ADDRESS == 5); + ASAliasRules = &ASAliasRulesGenIsZero; + } else { + assert(AS.PRIVATE_ADDRESS == 0 && + AS.GLOBAL_ADDRESS == 1 && + AS.CONSTANT_ADDRESS == 2 && + AS.LOCAL_ADDRESS == 3 && + AS.FLAT_ADDRESS == 4 && + AS.REGION_ADDRESS == 5); + ASAliasRules = &ASAliasRulesPrivIsZero; + } +} + +AliasResult AMDGPUAAResult::ASAliasRulesTy::getAliasResult(unsigned AS1, + unsigned AS2) const { + if (AS1 > AS.MAX_COMMON_ADDRESS || AS2 > AS.MAX_COMMON_ADDRESS) + report_fatal_error("Pointer address space out of range"); + return (*ASAliasRules)[AS1][AS2]; +} + AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { - // This array is indexed by the AMDGPUAS::AddressSpaces - // enum elements PRIVATE_ADDRESS ... to FLAT_ADDRESS - // see "llvm/Transforms/AMDSPIRUtils.h" - static const AliasResult ASAliasRules[5][5] = { - /* Private Global Constant Group Flat */ - /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias}, - /* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias}, - /* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias}, - /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}, - /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias} - }; unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace(); unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace(); - if (asA > AMDGPUAS::AddressSpaces::FLAT_ADDRESS || - asB > AMDGPUAS::AddressSpaces::FLAT_ADDRESS) - report_fatal_error("Pointer address space out of range"); - AliasResult Result = ASAliasRules[asA][asB]; + AliasResult Result = ASAliasRules.getAliasResult(asA, asB); if (Result == NoAlias) return Result; if (isa(LocA.Ptr) && isa(LocB.Ptr)) { @@ -75,8 +109,7 @@ bool OrLocal) { const Value *Base = GetUnderlyingObject(Loc.Ptr, DL); - if (Base->getType()->getPointerAddressSpace() == - AMDGPUAS::AddressSpaces::CONSTANT_ADDRESS) { + if (Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS) { return true; } Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -28,7 +28,8 @@ class AMDGPUAnnotateKernelFeatures : public ModulePass { private: const TargetMachine *TM; - static bool hasAddrSpaceCast(const Function &F); + AMDGPUAS AS; + static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS); void addAttrToCallers(Function *Intrin, StringRef AttrName); bool addAttrsForIntrinsics(Module &M, ArrayRef); @@ -48,10 +49,11 @@ ModulePass::getAnalysisUsage(AU); } - static bool visitConstantExpr(const ConstantExpr *CE); + static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS); static bool visitConstantExprsRecursively( const Constant *EntryC, - SmallPtrSet &ConstantExprVisited); + SmallPtrSet &ConstantExprVisited, + AMDGPUAS AS); }; } @@ -65,18 +67,20 @@ // The queue ptr is only needed when casting to flat, not from it. -static bool castRequiresQueuePtr(unsigned SrcAS) { - return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; +static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) { + return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS; } -static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) { - return castRequiresQueuePtr(ASC->getSrcAddressSpace()); +static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC, + const AMDGPUAS &AS) { + return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS); } -bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { +bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE, + AMDGPUAS AS) { if (CE->getOpcode() == Instruction::AddrSpaceCast) { unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); - return castRequiresQueuePtr(SrcAS); + return castRequiresQueuePtr(SrcAS, AS); } return false; @@ -84,7 +88,8 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( const Constant *EntryC, - SmallPtrSet &ConstantExprVisited) { + SmallPtrSet &ConstantExprVisited, + AMDGPUAS AS) { if (!ConstantExprVisited.insert(EntryC).second) return false; @@ -97,7 +102,7 @@ // Check this constant expression. if (const auto *CE = dyn_cast(C)) { - if (visitConstantExpr(CE)) + if (visitConstantExpr(CE, AS)) return true; } @@ -118,13 +123,14 @@ } // Return true if an addrspacecast is used that requires the queue ptr. -bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) { +bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F, + AMDGPUAS AS) { SmallPtrSet ConstantExprVisited; for (const BasicBlock &BB : F) { for (const Instruction &I : BB) { if (const AddrSpaceCastInst *ASC = dyn_cast(&I)) { - if (castRequiresQueuePtr(ASC)) + if (castRequiresQueuePtr(ASC, AS)) return true; } @@ -133,7 +139,7 @@ if (!OpC) continue; - if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) + if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) return true; } } @@ -173,6 +179,7 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { Triple TT(M.getTargetTriple()); + AS = AMDGPU::getAMDGPUAS(M); static const StringRef IntrinsicToAttr[][2] = { // .x omitted @@ -216,7 +223,7 @@ bool HasApertureRegs = TM && TM->getSubtarget(F).hasApertureRegs(); - if (!HasApertureRegs && hasAddrSpaceCast(F)) + if (!HasApertureRegs && hasAddrSpaceCast(F, AS)) F.addFnAttr("amdgpu-queue-ptr"); } } Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -37,6 +37,7 @@ LoopInfo *LI; DenseMap noClobberClones; bool isKernelFunc; + AMDGPUAS AMDGPUASI; public: static char ID; @@ -130,8 +131,8 @@ Value *Ptr = I.getPointerOperand(); if (!DA->isUniform(Ptr)) return; - auto isGlobalLoad = [](LoadInst &Load)->bool { - return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; + auto isGlobalLoad = [&](LoadInst &Load)->bool { + return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS; }; // We're tracking up to the Function boundaries // We cannot go beyond because of FunctionPass restrictions @@ -166,6 +167,7 @@ } bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) { + AMDGPUASI = AMDGPU::getAMDGPUAS(M); return false; } Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -16,6 +16,7 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H #include "AMDKernelCodeT.h" +#include "AMDGPU.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include @@ -150,6 +151,7 @@ protected: std::vector DisasmLines, HexLines; size_t DisasmLineMaxLen; + AMDGPUAS AMDGPUASI; }; } // end namespace llvm Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -17,6 +17,7 @@ // #include "AMDGPUAsmPrinter.h" +#include "AMDGPUTargetMachine.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "InstPrinter/AMDGPUInstPrinter.h" #include "Utils/AMDGPUBaseInfo.h" @@ -92,7 +93,9 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) - : AsmPrinter(TM, std::move(Streamer)) {} + : AsmPrinter(TM, std::move(Streamer)) { + AMDGPUASI = static_cast(&TM)->getAMDGPUAS(); + } StringRef AMDGPUAsmPrinter::getPassName() const { return "AMDGPU Assembly Printer"; @@ -174,7 +177,7 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Group segment variables aren't emitted in HSA. - if (AMDGPU::isGroupSegment(GV)) + if (AMDGPU::isGroupSegment(GV, AMDGPUASI)) return; AsmPrinter::EmitGlobalVariable(GV); Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H +#include "AMDGPU.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" namespace llvm { @@ -22,6 +23,7 @@ class AMDGPUTargetLowering; class AMDGPUCallLowering: public CallLowering { + AMDGPUAS AMDGPUASI; unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy, unsigned Offset) const; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -31,7 +31,7 @@ #endif AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) - : CallLowering(&TLI) { + : CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) { } bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, @@ -49,7 +49,7 @@ MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = *MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); - PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS); LLT PtrType = getLLTForType(*PtrTy, DL); unsigned DstReg = MRI.createGenericVirtualRegister(PtrType); unsigned KernArgSegmentPtr = @@ -70,7 +70,7 @@ MachineFunction &MF = MIRBuilder.getMF(); const Function &F = *MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); - PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); unsigned TypeSize = DL.getTypeStoreSize(ParamTy); unsigned Align = DL.getABITypeAlignment(ParamTy); Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -67,10 +67,13 @@ // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can // make the right decision when generating code for different targets. const AMDGPUSubtarget *Subtarget; + AMDGPUAS AMDGPUASI; public: explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(TM, OptLevel) {} + : SelectionDAGISel(TM, OptLevel){ + AMDGPUASI = AMDGPU::getAMDGPUAS(TM); + } ~AMDGPUDAGToDAGISel() override = default; bool runOnMachineFunction(MachineFunction &MF) override; @@ -269,7 +272,7 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || - cast(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) + cast(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS) return N; const SITargetLowering& Lowering = @@ -586,9 +589,9 @@ if (!N->readMem()) return false; if (CbId == -1) - return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; + return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; - return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; + return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; } bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { @@ -1536,7 +1539,7 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { MemSDNode *Mem = cast(N); unsigned AS = Mem->getAddressSpace(); - if (AS == AMDGPUAS::FLAT_ADDRESS) { + if (AS == AMDGPUASI.FLAT_ADDRESS) { SelectCode(N); return; } Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H +#include "AMDGPU.h" #include "llvm/Target/TargetLowering.h" namespace llvm { @@ -34,6 +35,7 @@ protected: const AMDGPUSubtarget *Subtarget; + AMDGPUAS AMDGPUASI; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; @@ -224,6 +226,10 @@ /// type of implicit parameter. uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const; + + AMDGPUAS getAMDGPUAS() const { + return AMDGPUASI; + } }; namespace AMDGPUISD { Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -59,6 +59,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { + AMDGPUASI = AMDGPU::getAMDGPUAS(TM); // Lower floating point store/load to integer store/load to reduce the number // of patterns in tablegen. setOperationAction(ISD::LOAD, MVT::f32, Promote); @@ -967,19 +968,16 @@ GlobalAddressSDNode *G = cast(Op); const GlobalValue *GV = G->getGlobal(); - switch (G->getAddressSpace()) { - case AMDGPUAS::LOCAL_ADDRESS: { + if (G->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) { // XXX: What does the value of G->getOffset() mean? assert(G->getOffset() == 0 && "Do not know what to do with an non-zero offset"); // TODO: We could emit code to handle the initialization somewhere. - if (hasDefinedInitializer(GV)) - break; - - unsigned Offset = MFI->allocateLDSGlobal(DL, *GV); - return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType()); - } + if (!hasDefinedInitializer(GV)) { + unsigned Offset = MFI->allocateLDSGlobal(DL, *GV); + return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType()); + } } const Function &Fn = *DAG.getMachineFunction().getFunction(); Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H +#include "AMDGPU.h" #include "llvm/Target/TargetInstrInfo.h" #include "Utils/AMDGPUBaseInfo.h" @@ -35,6 +36,8 @@ const AMDGPUSubtarget &ST; virtual void anchor(); +protected: + AMDGPUAS AMDGPUASI; public: explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st); Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -30,7 +30,7 @@ void AMDGPUInstrInfo::anchor() {} AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) - : AMDGPUGenInstrInfo(-1, -1), ST(ST) {} + : AMDGPUGenInstrInfo(-1, -1), ST(ST), AMDGPUASI(ST.getAMDGPUAS()) {} // FIXME: This behaves strangely. If, for example, you have 32 load + stores, // the first 16 loads will be interleaved with the stores, and the next 16 will Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -14,6 +14,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H +#include "AMDGPU.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" @@ -35,7 +36,6 @@ const AMDGPURegisterBankInfo &RBI); bool select(MachineInstr &I) const override; - private: struct GEPInfo { const MachineInstr &GEP; @@ -59,6 +59,8 @@ const SIInstrInfo &TII; const SIRegisterInfo &TRI; const AMDGPURegisterBankInfo &RBI; +protected: + AMDGPUAS AMDGPUASI; }; } // End llvm namespace. Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -33,7 +33,7 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector( const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI) : InstructionSelector(), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI) {} + TRI(*STI.getRegisterInfo()), RBI(RBI), AMDGPUASI(STI.getAMDGPUAS()) {} MachineOperand AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, @@ -291,7 +291,7 @@ if (!I.hasOneMemOperand()) return false; - if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS) + if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS) return false; if (!isInstrUniform(I)) Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -204,7 +204,7 @@ //===----------------------------------------------------------------------===// class PrivateMemOp : PatFrag (N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS; }]>; class PrivateLoad : PrivateMemOp < @@ -222,7 +222,7 @@ def store_private : PrivateStore ; class GlobalMemOp : PatFrag (N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS; }]>; // Global address space loads @@ -242,7 +242,7 @@ class ConstantMemOp : PatFrag (N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; }]>; // Constant address space loads @@ -253,7 +253,7 @@ def constant_load : ConstantLoad; class LocalMemOp : PatFrag (N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; // Local address space loads @@ -266,7 +266,7 @@ >; class FlatMemOp : PatFrag (N)->getAddressSPace() == AMDGPUAS::FLAT_ADDRESS; + return cast(N)->getAddressSPace() == AMDGPUASI.FLAT_ADDRESS; }]>; class FlatLoad : FlatMemOp < @@ -348,7 +348,7 @@ class local_binary_atomic_op : PatFrag<(ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), [{ - return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; @@ -366,7 +366,7 @@ def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ - return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS; }]>; multiclass AtomicCmpSwapLocal { @@ -376,7 +376,7 @@ (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ AtomicSDNode *AN = cast(N); return AN->getMemoryVT() == MVT::i32 && - AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; def _64_local : PatFrag< @@ -384,7 +384,7 @@ (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ AtomicSDNode *AN = cast(N); return AN->getMemoryVT() == MVT::i64 && - AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; } @@ -394,17 +394,17 @@ def "" : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>; + [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>; def _noret : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; + [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; def _ret : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; + [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; } defm atomic_swap_global : global_binary_atomic_op; @@ -422,22 +422,22 @@ def AMDGPUatomic_cmp_swap_global : PatFrag< (ops node:$ptr, node:$value), (AMDGPUatomic_cmp_swap node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>; + [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>; def atomic_cmp_swap_global : PatFrag< (ops node:$ptr, node:$cmp, node:$value), (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>; + [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>; def atomic_cmp_swap_global_noret : PatFrag< (ops node:$ptr, node:$cmp, node:$value), (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; + [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; def atomic_cmp_swap_global_ret : PatFrag< (ops node:$ptr, node:$cmp, node:$value), (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; + [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; //===----------------------------------------------------------------------===// // Misc Pattern Fragments Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -72,6 +72,7 @@ Module *Mod = nullptr; const DataLayout *DL = nullptr; MDNode *MaxWorkGroupSizeRange = nullptr; + AMDGPUAS AS; // FIXME: This should be per-kernel. uint32_t LocalMemLimit = 0; @@ -154,6 +155,7 @@ const AMDGPUSubtarget &ST = TM->getSubtarget(F); if (!ST.isPromoteAllocaEnabled()) return false; + AS = AMDGPU::getAMDGPUAS(*F.getParent()); FunctionType *FTy = F.getFunctionType(); @@ -162,7 +164,7 @@ // we cannot use local memory in the pass. for (Type *ParamTy : FTy->params()) { PointerType *PtrTy = dyn_cast(ParamTy); - if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) { LocalMemLimit = 0; DEBUG(dbgs() << "Function has local memory argument. Promoting to " "local memory disabled.\n"); @@ -179,7 +181,7 @@ // Check how much local memory is being used by global objects CurrentLocalMemUsage = 0; for (GlobalVariable &GV : Mod->globals()) { - if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) + if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS) continue; for (const User *U : GV.users()) { @@ -317,7 +319,7 @@ Type *I32Ty = Type::getInt32Ty(Mod->getContext()); Value *CastDispatchPtr = Builder.CreateBitCast( - DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS)); + DispatchPtr, PointerType::get(I32Ty, AS.CONSTANT_ADDRESS)); // We could do a single 64-bit load here, but it's likely that the basic // 32-bit and extract sequence is already present, and it is probably easier @@ -413,7 +415,7 @@ } } -static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { +static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { ArrayType *AllocaTy = dyn_cast(Alloca->getAllocatedType()); DEBUG(dbgs() << "Alloca candidate for vectorization\n"); @@ -468,7 +470,7 @@ IRBuilder<> Builder(Inst); switch (Inst->getOpcode()) { case Instruction::Load: { - Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); + Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); Value *Ptr = Inst->getOperand(0); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); @@ -480,7 +482,7 @@ break; } case Instruction::Store: { - Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); + Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); Value *Ptr = Inst->getOperand(1); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); @@ -673,7 +675,7 @@ DEBUG(dbgs() << "Trying to promote " << I << '\n'); - if (tryPromoteAllocaToVector(&I)) { + if (tryPromoteAllocaToVector(&I, AS)) { DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); return; } @@ -734,7 +736,7 @@ Twine(F->getName()) + Twine('.') + I.getName(), nullptr, GlobalVariable::NotThreadLocal, - AMDGPUAS::LOCAL_ADDRESS); + AS.LOCAL_ADDRESS); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); GV->setAlignment(I.getAlignment()); @@ -767,7 +769,7 @@ if (ICmpInst *CI = dyn_cast(V)) { Value *Src0 = CI->getOperand(0); Type *EltTy = Src0->getType()->getPointerElementType(); - PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); + PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS); if (isa(CI->getOperand(0))) CI->setOperand(0, ConstantPointerNull::get(NewTy)); @@ -784,7 +786,7 @@ continue; Type *EltTy = V->getType()->getPointerElementType(); - PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); + PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS); // FIXME: It doesn't really make sense to try to do this for all // instructions. @@ -852,7 +854,7 @@ Type *SrcTy = Src->getType()->getPointerElementType(); Function *ObjectSize = Intrinsic::getDeclaration(Mod, Intrinsic::objectsize, - { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) } + { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) } ); CallInst *NewCall = Builder.CreateCall( Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -157,6 +157,7 @@ InstrItineraryData InstrItins; SelectionDAGTargetInfo TSInfo; + AMDGPUAS AS; public: AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, @@ -214,6 +215,10 @@ return MaxPrivateElementSize; } + AMDGPUAS getAMDGPUAS() const { + return AS; + } + bool has16BitInsts() const { return Has16BitInsts; } Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -135,6 +135,7 @@ FeatureDisable(false), InstrItins(getInstrItineraryForCPU(GPU)) { + AS = AMDGPU::getAMDGPUAS(TT); initializeSubtargetDependencies(TT, GPU, FS); } Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -35,6 +35,7 @@ protected: std::unique_ptr TLOF; AMDGPUIntrinsicInfo IntrinsicInfo; + AMDGPUAS AS; StringRef getGPUName(const Function &F) const; StringRef getFeatureString(const Function &F) const; @@ -57,17 +58,16 @@ TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + AMDGPUAS getAMDGPUAS() const { + return AS; + } void adjustPassManager(PassManagerBuilder &) override; /// Get the integer value of a null pointer in the given address space. uint64_t getNullPointerValue(unsigned AddrSpace) const { - switch(AddrSpace) { - case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: + if (AddrSpace == AS.LOCAL_ADDRESS || AddrSpace == AS.REGION_ADDRESS) return -1; - default: - return 0; - } + return 0; } }; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -240,6 +240,7 @@ : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU), FS, Options, getEffectiveRelocModel(RM), CM, OptLevel), TLOF(createTLOF(getTargetTriple())) { + AS = AMDGPU::getAMDGPUAS(TT); initAsmInfo(); } @@ -809,3 +810,4 @@ TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { return new GCNPassConfig(this, PM); } + Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H +#include "AMDGPU.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Target/TargetMachine.h" Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "AMDGPUTargetMachine.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPU.h" #include "llvm/MC/MCContext.h" @@ -22,7 +23,8 @@ MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { - if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO) && + auto AS = static_cast(&TM)->getAMDGPUAS(); + if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO, AS) && AMDGPU::shouldEmitConstantsToTextSection(TM.getTargetTriple())) return TextSection; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -110,7 +110,7 @@ if (IsGraphicsShader) return -1; return ST->hasFlatAddressSpace() ? - AMDGPUAS::FLAT_ADDRESS : AMDGPUAS::UNKNOWN_ADDRESS_SPACE; + ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE; } unsigned getVectorSplitCost() { return 0; } Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -48,7 +48,7 @@ const DataLayout &DL = BB->getModule()->getDataLayout(); for (const Instruction &I : *BB) { const GetElementPtrInst *GEP = dyn_cast(&I); - if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) + if (!GEP || GEP->getAddressSpace() != ST->getAMDGPUAS().PRIVATE_ADDRESS) continue; const Value *Ptr = GEP->getPointerOperand(); @@ -108,25 +108,24 @@ } unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { - switch (AddrSpace) { - case AMDGPUAS::GLOBAL_ADDRESS: - case AMDGPUAS::CONSTANT_ADDRESS: - case AMDGPUAS::FLAT_ADDRESS: + AMDGPUAS AS = ST->getAMDGPUAS(); + if (AddrSpace == AS.GLOBAL_ADDRESS || + AddrSpace == AS.CONSTANT_ADDRESS || + AddrSpace == AS.FLAT_ADDRESS) return 128; - case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: + if (AddrSpace == AS.LOCAL_ADDRESS || + AddrSpace == AS.REGION_ADDRESS) return 64; - case AMDGPUAS::PRIVATE_ADDRESS: + if (AddrSpace == AS.PRIVATE_ADDRESS) return 8 * ST->getMaxPrivateElementSize(); - default: - if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS && - (AddrSpace == AMDGPUAS::PARAM_D_ADDRESS || - AddrSpace == AMDGPUAS::PARAM_I_ADDRESS || - (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 && - AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15))) - return 128; - llvm_unreachable("unhandled address space"); - } + + if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS && + (AddrSpace == AS.PARAM_D_ADDRESS || + AddrSpace == AS.PARAM_I_ADDRESS || + (AddrSpace >= AS.CONSTANT_BUFFER_0 && + AddrSpace <= AS.CONSTANT_BUFFER_15))) + return 128; + llvm_unreachable("unhandled address space"); } bool AMDGPUTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, @@ -135,7 +134,7 @@ // We allow vectorization of flat stores, even though we may need to decompose // them later if they may access private memory. We don't have enough context // here, and legalization can handle it. - if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) { + if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS) { return (Alignment >= 4 || ST->hasUnalignedScratchAccess()) && ChainSizeInBytes <= ST->getMaxPrivateElementSize(); } @@ -362,7 +361,7 @@ // All other loads are not divergent, because if threads issue loads with the // same arguments, they will always get the same result. if (const LoadInst *Load = dyn_cast(V)) - return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; + return Load->getPointerAddressSpace() == ST->getAMDGPUAS().PRIVATE_ADDRESS; // Atomics are divergent because they are executed sequentially: when an // atomic operation refers to the same address in each thread, then each Index: llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td @@ -21,8 +21,8 @@ class MubufLoad : PatFrag < (ops node:$ptr), (op node:$ptr), [{ auto const AS = cast(N)->getAddressSpace(); - return AS == AMDGPUAS::GLOBAL_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS; + return AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.CONSTANT_ADDRESS; }]>; def mubuf_load : MubufLoad ; Index: llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td @@ -136,7 +136,7 @@ class flat_binary_atomic_op : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}] + [{return cast(N)->getAddressSpace() == AMDGPUASI.FLAT_ADDRESS;}] >; def atomic_cmp_swap_flat : flat_binary_atomic_op; @@ -284,16 +284,16 @@ class flat_ld : PatFrag<(ops node:$ptr), (ld node:$ptr), [{ auto const AS = cast(N)->getAddressSpace(); - return AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::GLOBAL_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS; + return AS == AMDGPUASI.FLAT_ADDRESS || + AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.CONSTANT_ADDRESS; }]>; class flat_st : PatFrag<(ops node:$val, node:$ptr), (st node:$val, node:$ptr), [{ auto const AS = cast(N)->getAddressSpace(); - return AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::GLOBAL_ADDRESS; + return AS == AMDGPUASI.FLAT_ADDRESS || + AS == AMDGPUASI.GLOBAL_ADDRESS; }]>; def atomic_flat_load : flat_ld ; Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H +#include "AMDGPU.h" #include "AMDGPUCodeObjectMetadata.h" #include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" @@ -36,6 +37,7 @@ class MetadataStreamer final { private: Metadata CodeObjectMetadata; + AMDGPUAS AMDGPUASI; void dump(StringRef YamlString) const; Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp @@ -264,20 +264,18 @@ AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer( unsigned AddressSpace) const { - switch (AddressSpace) { - case AMDGPUAS::PRIVATE_ADDRESS: + if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS) return AddressSpaceQualifier::Private; - case AMDGPUAS::GLOBAL_ADDRESS: + if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS) return AddressSpaceQualifier::Global; - case AMDGPUAS::CONSTANT_ADDRESS: + if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS) return AddressSpaceQualifier::Constant; - case AMDGPUAS::LOCAL_ADDRESS: + if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS) return AddressSpaceQualifier::Local; - case AMDGPUAS::FLAT_ADDRESS: + if (AddressSpace == AMDGPUASI.FLAT_ADDRESS) return AddressSpaceQualifier::Generic; - case AMDGPUAS::REGION_ADDRESS: + if (AddressSpace == AMDGPUASI.REGION_ADDRESS) return AddressSpaceQualifier::Region; - } llvm_unreachable("Unknown address space qualifier"); } @@ -304,7 +302,7 @@ "image3d_t", ValueKind::Image) .Default(isa(Ty) ? (Ty->getPointerAddressSpace() == - AMDGPUAS::LOCAL_ADDRESS ? + AMDGPUASI.LOCAL_ADDRESS ? ValueKind::DynamicSharedPointer : ValueKind::GlobalBuffer) : ValueKind::ByValue); @@ -460,7 +458,7 @@ return; auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), - AMDGPUAS::GLOBAL_ADDRESS); + AMDGPUASI.GLOBAL_ADDRESS); emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer); } @@ -513,7 +511,7 @@ if (auto PtrTy = dyn_cast(Ty)) { auto ElTy = PtrTy->getElementType(); - if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ElTy->isSized()) + if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS && ElTy->isSized()) Arg.mPointeeAlign = DL.getABITypeAlignment(ElTy); } @@ -576,6 +574,7 @@ } void MetadataStreamer::begin(const Module &Mod) { + AMDGPUASI = getAMDGPUAS(Mod); emitVersion(); emitPrintf(Mod); } Index: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -557,7 +557,7 @@ } case Intrinsic::r600_implicitarg_ptr: { - MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS); + MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS); uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT); return DAG.getConstant(ByteOffset, DL, PtrVT); } @@ -707,12 +707,12 @@ SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *GSD = cast(Op); - if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) + if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS) return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); const DataLayout &DL = DAG.getDataLayout(); const GlobalValue *GV = GSD->getGlobal(); - MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); + MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS); SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT); return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA); @@ -869,7 +869,7 @@ unsigned DwordOffset) const { unsigned ByteOffset = DwordOffset * 4; PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), - AMDGPUAS::CONSTANT_BUFFER_0); + AMDGPUASI.CONSTANT_BUFFER_0); // We shouldn't be using an offset wider than 16-bits for implicit parameters. assert(isInt<16>(ByteOffset)); @@ -1107,7 +1107,7 @@ //TODO: Who creates the i8 stores? assert(Store->isTruncatingStore() || Store->getValue().getValueType() == MVT::i8); - assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS); + assert(Store->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS); SDValue Mask; if (Store->getMemoryVT() == MVT::i8) { @@ -1205,9 +1205,10 @@ SDLoc DL(Op); // Neither LOCAL nor PRIVATE can do vectors at the moment - if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) && + if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) && VT.isVector()) { - if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) { + if ((AS == AMDGPUASI.PRIVATE_ADDRESS) && + StoreNode->isTruncatingStore()) { // Add an extra level of chain to isolate this vector SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain); // TODO: can the chain be replaced without creating a new store? @@ -1230,7 +1231,7 @@ SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr, DAG.getConstant(2, DL, PtrVT)); - if (AS == AMDGPUAS::GLOBAL_ADDRESS) { + if (AS == AMDGPUASI.GLOBAL_ADDRESS) { // It is beneficial to create MSKOR here instead of combiner to avoid // artificial dependencies introduced by RMW if (StoreNode->isTruncatingStore()) { @@ -1283,7 +1284,7 @@ } // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes - if (AS != AMDGPUAS::PRIVATE_ADDRESS) + if (AS != AMDGPUASI.PRIVATE_ADDRESS) return SDValue(); if (MemVT.bitsLT(MVT::i32)) @@ -1302,39 +1303,39 @@ // return (512 + (kc_bank << 12) static int -ConstantAddressBlock(unsigned AddressSpace) { +ConstantAddressBlock(unsigned AddressSpace, AMDGPUAS AMDGPUASI) { switch (AddressSpace) { - case AMDGPUAS::CONSTANT_BUFFER_0: + case AMDGPUASI.CONSTANT_BUFFER_0: return 512; - case AMDGPUAS::CONSTANT_BUFFER_1: + case AMDGPUASI.CONSTANT_BUFFER_1: return 512 + 4096; - case AMDGPUAS::CONSTANT_BUFFER_2: + case AMDGPUASI.CONSTANT_BUFFER_2: return 512 + 4096 * 2; - case AMDGPUAS::CONSTANT_BUFFER_3: + case AMDGPUASI.CONSTANT_BUFFER_3: return 512 + 4096 * 3; - case AMDGPUAS::CONSTANT_BUFFER_4: + case AMDGPUASI.CONSTANT_BUFFER_4: return 512 + 4096 * 4; - case AMDGPUAS::CONSTANT_BUFFER_5: + case AMDGPUASI.CONSTANT_BUFFER_5: return 512 + 4096 * 5; - case AMDGPUAS::CONSTANT_BUFFER_6: + case AMDGPUASI.CONSTANT_BUFFER_6: return 512 + 4096 * 6; - case AMDGPUAS::CONSTANT_BUFFER_7: + case AMDGPUASI.CONSTANT_BUFFER_7: return 512 + 4096 * 7; - case AMDGPUAS::CONSTANT_BUFFER_8: + case AMDGPUASI.CONSTANT_BUFFER_8: return 512 + 4096 * 8; - case AMDGPUAS::CONSTANT_BUFFER_9: + case AMDGPUASI.CONSTANT_BUFFER_9: return 512 + 4096 * 9; - case AMDGPUAS::CONSTANT_BUFFER_10: + case AMDGPUASI.CONSTANT_BUFFER_10: return 512 + 4096 * 10; - case AMDGPUAS::CONSTANT_BUFFER_11: + case AMDGPUASI.CONSTANT_BUFFER_11: return 512 + 4096 * 11; - case AMDGPUAS::CONSTANT_BUFFER_12: + case AMDGPUASI.CONSTANT_BUFFER_12: return 512 + 4096 * 12; - case AMDGPUAS::CONSTANT_BUFFER_13: + case AMDGPUASI.CONSTANT_BUFFER_13: return 512 + 4096 * 13; - case AMDGPUAS::CONSTANT_BUFFER_14: + case AMDGPUASI.CONSTANT_BUFFER_14: return 512 + 4096 * 14; - case AMDGPUAS::CONSTANT_BUFFER_15: + case AMDGPUASI.CONSTANT_BUFFER_15: return 512 + 4096 * 15; default: return -1; @@ -1402,7 +1403,7 @@ EVT MemVT = LoadNode->getMemoryVT(); ISD::LoadExtType ExtType = LoadNode->getExtensionType(); - if (AS == AMDGPUAS::PRIVATE_ADDRESS && + if (AS == AMDGPUASI.PRIVATE_ADDRESS && ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) { return lowerPrivateExtLoad(Op, DAG); } @@ -1412,13 +1413,14 @@ SDValue Chain = LoadNode->getChain(); SDValue Ptr = LoadNode->getBasePtr(); - if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || - LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && + if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS || + LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) && VT.isVector()) { return scalarizeVectorLoad(LoadNode, DAG); } - int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); + int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace(), + AMDGPUASI); if (ConstantBlock > -1 && ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) || (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) { @@ -1450,7 +1452,7 @@ DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, DL, MVT::i32)), DAG.getConstant(LoadNode->getAddressSpace() - - AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32) + AMDGPUASI.CONSTANT_BUFFER_0, DL, MVT::i32) ); } @@ -1486,7 +1488,7 @@ return DAG.getMergeValues(MergedValues, DL); } - if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { + if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) { return SDValue(); } @@ -1563,7 +1565,7 @@ } PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), - AMDGPUAS::CONSTANT_BUFFER_0); + AMDGPUASI.CONSTANT_BUFFER_0); // i64 isn't a legal type, so the register type used ends up as i32, which // isn't expected here. It attempts to create this sextload, but it ends up Index: llvm/trunk/lib/Target/AMDGPU/R600Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R600Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/R600Instructions.td @@ -316,7 +316,7 @@ class LoadParamFrag : PatFrag < (ops node:$ptr), (load_type node:$ptr), [{ return isConstantLoad(cast(N), 0) || - (cast(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }] + (cast(N)->getAddressSpace() == AMDGPUASI.PARAM_I_ADDRESS); }] >; def vtx_id3_az_extloadi8 : LoadParamFrag; @@ -326,8 +326,8 @@ class LoadVtxId1 : PatFrag < (ops node:$ptr), (load node:$ptr), [{ const MemSDNode *LD = cast(N); - return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || - (LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + return LD->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS || + (LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && !isa(GetUnderlyingObject( LD->getMemOperand()->getValue(), CurDAG->getDataLayout()))); }]>; @@ -339,7 +339,7 @@ class LoadVtxId2 : PatFrag < (ops node:$ptr), (load node:$ptr), [{ const MemSDNode *LD = cast(N); - return LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + return LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && isa(GetUnderlyingObject( LD->getMemOperand()->getValue(), CurDAG->getDataLayout())); }]>; Index: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -202,6 +202,7 @@ // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was // specified. const SISubtarget &ST = MF.getSubtarget(); + auto AMDGPUASI = ST.getAMDGPUAS(); if (ST.debuggerEmitPrologue()) emitDebuggerPrologue(MF, MBB); @@ -340,7 +341,7 @@ PointerType *PtrTy = PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()), - AMDGPUAS::CONSTANT_ADDRESS); + AMDGPUASI.CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); auto MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad | Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp @@ -597,8 +597,7 @@ if (AM.BaseGV) return false; - switch (AS) { - case AMDGPUAS::GLOBAL_ADDRESS: + if (AS == AMDGPUASI.GLOBAL_ADDRESS) { if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { // Assume the we will use FLAT for all global memory accesses // on VI. @@ -613,8 +612,7 @@ } return isLegalMUBUFAddressingMode(AM); - - case AMDGPUAS::CONSTANT_ADDRESS: + } else if (AS == AMDGPUASI.CONSTANT_ADDRESS) { // If the offset isn't a multiple of 4, it probably isn't going to be // correctly aligned. // FIXME: Can we get the real alignment here? @@ -652,11 +650,10 @@ return false; - case AMDGPUAS::PRIVATE_ADDRESS: + } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) { return isLegalMUBUFAddressingMode(AM); - - case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: + } else if (AS == AMDGPUASI.LOCAL_ADDRESS || + AS == AMDGPUASI.REGION_ADDRESS) { // Basic, single offset DS instructions allow a 16-bit unsigned immediate // field. // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have @@ -671,17 +668,15 @@ return true; return false; - - case AMDGPUAS::FLAT_ADDRESS: - case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: + } else if (AS == AMDGPUASI.FLAT_ADDRESS || + AS == AMDGPUASI.UNKNOWN_ADDRESS_SPACE) { // For an unknown address space, this usually means that this is for some // reason being used for pure arithmetic, and not based on some addressing // computation. We don't have instructions that compute pointers with any // addressing modes, so treat them as having no offset like flat // instructions. return isLegalFlatAddressingMode(AM); - - default: + } else { llvm_unreachable("unhandled address space"); } } @@ -702,8 +697,8 @@ return false; } - if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || - AddrSpace == AMDGPUAS::REGION_ADDRESS) { + if (AddrSpace == AMDGPUASI.LOCAL_ADDRESS || + AddrSpace == AMDGPUASI.REGION_ADDRESS) { // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte // aligned, 8 byte access in a single operation using ds_read2/write2_b32 // with adjacent offsets. @@ -718,8 +713,8 @@ // will access scratch. If we had access to the IR function, then we // could determine if any private memory was used in the function. if (!Subtarget->hasUnalignedScratchAccess() && - (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS || - AddrSpace == AMDGPUAS::FLAT_ADDRESS)) { + (AddrSpace == AMDGPUASI.PRIVATE_ADDRESS || + AddrSpace == AMDGPUASI.FLAT_ADDRESS)) { return false; } @@ -727,7 +722,7 @@ // If we have an uniform constant load, it still requires using a slow // buffer instruction if unaligned. if (IsFast) { - *IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) ? + *IsFast = (AddrSpace == AMDGPUASI.CONSTANT_ADDRESS) ? (Align % 4 == 0) : true; } @@ -767,15 +762,16 @@ return MVT::Other; } -static bool isFlatGlobalAddrSpace(unsigned AS) { - return AS == AMDGPUAS::GLOBAL_ADDRESS || - AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS; +static bool isFlatGlobalAddrSpace(unsigned AS, AMDGPUAS AMDGPUASI) { + return AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.FLAT_ADDRESS || + AS == AMDGPUASI.CONSTANT_ADDRESS; } bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { - return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS); + return isFlatGlobalAddrSpace(SrcAS, AMDGPUASI) && + isFlatGlobalAddrSpace(DestAS, AMDGPUASI); } bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const { @@ -789,7 +785,7 @@ unsigned DestAS) const { // Flat -> private/local is a simple truncate. // Flat -> global is no-op - if (SrcAS == AMDGPUAS::FLAT_ADDRESS) + if (SrcAS == AMDGPUASI.FLAT_ADDRESS) return true; return isNoopAddrSpaceCast(SrcAS, DestAS); @@ -850,7 +846,7 @@ unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); - MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); + MVT PtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS); SDValue BasePtr = DAG.getCopyFromReg(Chain, SL, MRI.getLiveInVirtReg(InputPtrReg), PtrVT); return DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, @@ -863,7 +859,7 @@ const ISD::InputArg *Arg) const { const DataLayout &DL = DAG.getDataLayout(); Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); - PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); unsigned Align = DL.getABITypeAlignment(Ty); @@ -1073,7 +1069,7 @@ auto *ParamTy = dyn_cast(FType->getParamType(Ins[i].getOrigArgIndex())); if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS && - ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + ParamTy && ParamTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) { // On SI local pointers are just offsets into LDS, so they are always // less than 16-bits. On CI and newer they could potentially be // real pointers, so we can't guarantee their size. @@ -2206,13 +2202,13 @@ bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const { const Triple &TT = getTargetMachine().getTargetTriple(); - return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + return GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && AMDGPU::shouldEmitConstantsToTextSection(TT); } bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const { - return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || - GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) && + return (GV->getType()->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS || + GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) && !shouldEmitFixup(GV) && !getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); } @@ -2351,7 +2347,7 @@ SelectionDAG &DAG) const { if (Subtarget->hasApertureRegs()) { // Read from Aperture Registers directly. - unsigned RegNo = (AS == AMDGPUAS::LOCAL_ADDRESS) ? AMDGPU::SRC_SHARED_BASE : + unsigned RegNo = (AS == AMDGPUASI.LOCAL_ADDRESS) ? AMDGPU::SRC_SHARED_BASE : AMDGPU::SRC_PRIVATE_BASE; return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, RegNo, MVT::i32); } @@ -2367,7 +2363,7 @@ // Offset into amd_queue_t for group_segment_aperture_base_hi / // private_segment_aperture_base_hi. - uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44; + uint32_t StructOffset = (AS == AMDGPUASI.LOCAL_ADDRESS) ? 0x40 : 0x44; SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, QueuePtr, DAG.getConstant(StructOffset, SL, MVT::i64)); @@ -2376,7 +2372,7 @@ // TODO: We should use the value from the IR intrinsic call, but it might not // be available and how do we get it? Value *V = UndefValue::get(PointerType::get(Type::getInt8Ty(*DAG.getContext()), - AMDGPUAS::CONSTANT_ADDRESS)); + AMDGPUASI.CONSTANT_ADDRESS)); MachinePointerInfo PtrInfo(V, StructOffset); return DAG.getLoad(MVT::i32, SL, QueuePtr.getValue(1), Ptr, PtrInfo, @@ -2397,9 +2393,11 @@ static_cast(getTargetMachine()); // flat -> local/private - if (ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS) { + if (ASC->getSrcAddressSpace() == AMDGPUASI.FLAT_ADDRESS) { unsigned DestAS = ASC->getDestAddressSpace(); - if (DestAS == AMDGPUAS::LOCAL_ADDRESS || DestAS == AMDGPUAS::PRIVATE_ADDRESS) { + + if (DestAS == AMDGPUASI.LOCAL_ADDRESS || + DestAS == AMDGPUASI.PRIVATE_ADDRESS) { unsigned NullVal = TM.getNullPointerValue(DestAS); SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32); SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE); @@ -2411,9 +2409,11 @@ } // local/private -> flat - if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) { + if (ASC->getDestAddressSpace() == AMDGPUASI.FLAT_ADDRESS) { unsigned SrcAS = ASC->getSrcAddressSpace(); - if (SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS) { + + if (SrcAS == AMDGPUASI.LOCAL_ADDRESS || + SrcAS == AMDGPUASI.PRIVATE_ADDRESS) { unsigned NullVal = TM.getNullPointerValue(SrcAS); SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32); @@ -2513,8 +2513,8 @@ bool SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // We can fold offsets for anything that doesn't require a GOT relocation. - return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || - GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) && + return (GA->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS || + GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) && !shouldEmitGOTReloc(GA->getGlobal()); } @@ -2565,8 +2565,8 @@ SelectionDAG &DAG) const { GlobalAddressSDNode *GSD = cast(Op); - if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && - GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) + if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS && + GSD->getAddressSpace() != AMDGPUASI.GLOBAL_ADDRESS) return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); SDLoc DL(GSD); @@ -2583,7 +2583,7 @@ SIInstrInfo::MO_GOTPCREL32); Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext()); - PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS); const DataLayout &DataLayout = DAG.getDataLayout(); unsigned Align = DataLayout.getABITypeAlignment(PtrTy); // FIXME: Use a PseudoSourceValue once those can be assigned an address space. @@ -3229,21 +3229,20 @@ SIMachineFunctionInfo *MFI = MF.getInfo(); // If there is a possibilty that flat instruction access scratch memory // then we need to use the same legalization rules we use for private. - if (AS == AMDGPUAS::FLAT_ADDRESS) + if (AS == AMDGPUASI.FLAT_ADDRESS) AS = MFI->hasFlatScratchInit() ? - AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS; + AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS; unsigned NumElements = MemVT.getVectorNumElements(); - switch (AS) { - case AMDGPUAS::CONSTANT_ADDRESS: + if (AS == AMDGPUASI.CONSTANT_ADDRESS) { if (isMemOpUniform(Load)) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they // have the same legalization requirements as global and private // loads. // - LLVM_FALLTHROUGH; - case AMDGPUAS::GLOBAL_ADDRESS: + } + if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) { if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) && isMemOpHasNoClobberedMemOperand(Load)) return SDValue(); @@ -3251,13 +3250,15 @@ // have the same legalization requirements as global and private // loads. // - LLVM_FALLTHROUGH; - case AMDGPUAS::FLAT_ADDRESS: + } + if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.FLAT_ADDRESS) { if (NumElements > 4) return SplitVectorLoad(Op, DAG); // v4 loads are supported for private and global memory. return SDValue(); - case AMDGPUAS::PRIVATE_ADDRESS: + } + if (AS == AMDGPUASI.PRIVATE_ADDRESS) { // Depending on the setting of the private_element_size field in the // resource descriptor, we can only make private accesses up to a certain // size. @@ -3276,7 +3277,7 @@ default: llvm_unreachable("unsupported private_element_size"); } - case AMDGPUAS::LOCAL_ADDRESS: + } else if (AS == AMDGPUASI.LOCAL_ADDRESS) { if (NumElements > 2) return SplitVectorLoad(Op, DAG); @@ -3285,9 +3286,8 @@ // If properly aligned, if we split we might be able to use ds_read_b64. return SplitVectorLoad(Op, DAG); - default: - return SDValue(); } + return SDValue(); } SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { @@ -3656,18 +3656,17 @@ SIMachineFunctionInfo *MFI = MF.getInfo(); // If there is a possibilty that flat instruction access scratch memory // then we need to use the same legalization rules we use for private. - if (AS == AMDGPUAS::FLAT_ADDRESS) + if (AS == AMDGPUASI.FLAT_ADDRESS) AS = MFI->hasFlatScratchInit() ? - AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS; + AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS; unsigned NumElements = VT.getVectorNumElements(); - switch (AS) { - case AMDGPUAS::GLOBAL_ADDRESS: - case AMDGPUAS::FLAT_ADDRESS: + if (AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.FLAT_ADDRESS) { if (NumElements > 4) return SplitVectorStore(Op, DAG); return SDValue(); - case AMDGPUAS::PRIVATE_ADDRESS: { + } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) { switch (Subtarget->getMaxPrivateElementSize()) { case 4: return scalarizeVectorStore(Store, DAG); @@ -3682,8 +3681,7 @@ default: llvm_unreachable("unsupported private_element_size"); } - } - case AMDGPUAS::LOCAL_ADDRESS: { + } else if (AS == AMDGPUASI.LOCAL_ADDRESS) { if (NumElements > 2) return SplitVectorStore(Op, DAG); @@ -3692,8 +3690,7 @@ // If properly aligned, if we split we might be able to use ds_write_b64. return SplitVectorStore(Op, DAG); - } - default: + } else { llvm_unreachable("unhandled address space"); } } @@ -3724,7 +3721,7 @@ unsigned AS = AtomicNode->getAddressSpace(); // No custom lowering required for local address space - if (!isFlatGlobalAddrSpace(AS)) + if (!isFlatGlobalAddrSpace(AS, AMDGPUASI)) return Op; // Non-local address space requires custom lowering for atomic compare @@ -3781,26 +3778,26 @@ /// the immediate offsets of a memory instruction for the given address space. static bool canFoldOffset(unsigned OffsetSize, unsigned AS, const SISubtarget &STI) { - switch (AS) { - case AMDGPUAS::GLOBAL_ADDRESS: + auto AMDGPUASI = STI.getAMDGPUAS(); + if (AS == AMDGPUASI.GLOBAL_ADDRESS) { // MUBUF instructions a 12-bit offset in bytes. return isUInt<12>(OffsetSize); - case AMDGPUAS::CONSTANT_ADDRESS: + } + if (AS == AMDGPUASI.CONSTANT_ADDRESS) { // SMRD instructions have an 8-bit offset in dwords on SI and // a 20-bit offset in bytes on VI. if (STI.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) return isUInt<20>(OffsetSize); else return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4); - case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: + } + if (AS == AMDGPUASI.LOCAL_ADDRESS || + AS == AMDGPUASI.REGION_ADDRESS) { // The single offset versions have a 16-bit offset in bytes. return isUInt<16>(OffsetSize); - case AMDGPUAS::PRIVATE_ADDRESS: - // Indirect register addressing does not use any offsets. - default: - return false; } + // Indirect register addressing does not use any offsets. + return false; } // (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2) @@ -3858,7 +3855,7 @@ // TODO: We could also do this for multiplies. unsigned AS = N->getAddressSpace(); - if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) { + if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUASI.PRIVATE_ADDRESS) { SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI); if (NewPtr) { SmallVector NewOps(N->op_begin(), N->op_end()); Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3747,7 +3747,7 @@ return AMDGPU::NoRegister; assert(!MI.memoperands_empty() && - (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS); + (*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS); FrameIndex = Addr->getIndex(); return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); @@ -3854,7 +3854,7 @@ return true; for (const MachineMemOperand *MMO : MI.memoperands()) { - if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS) + if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS) return true; } return false; Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -107,7 +107,7 @@ >; def si_ld_local : PatFrag <(ops node:$ptr), (SIld_local node:$ptr), [{ - return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; def si_load_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{ @@ -144,7 +144,7 @@ def si_st_local : PatFrag < (ops node:$val, node:$ptr), (SIst_local node:$val, node:$ptr), [{ - return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; def si_store_local : PatFrag < Index: llvm/trunk/lib/Target/AMDGPU/SMInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SMInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/SMInstructions.td @@ -226,9 +226,9 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ auto Ld = cast(N); return Ld->getAlignment() >= 4 && - ((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + ((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && static_cast(getTargetLowering())->isMemOpUniform(N)) || - (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && + (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && static_cast(getTargetLowering())->isMemOpUniform(N) && static_cast(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N))); }]>; Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -10,6 +10,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H +#include "AMDGPU.h" #include "AMDKernelCodeT.h" #include "SIDefines.h" #include "llvm/ADT/StringRef.h" @@ -160,9 +161,9 @@ MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx); -bool isGroupSegment(const GlobalValue *GV); -bool isGlobalSegment(const GlobalValue *GV); -bool isReadOnlySegment(const GlobalValue *GV); +bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS); +bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS); +bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS); /// \returns True if constants should be emitted to .text section for given /// target triple \p TT, false otherwise. Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" @@ -354,16 +355,16 @@ ELF::SHF_AMDGPU_HSA_AGENT); } -bool isGroupSegment(const GlobalValue *GV) { - return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { + return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; } -bool isGlobalSegment(const GlobalValue *GV) { - return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; +bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) { + return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS; } -bool isReadOnlySegment(const GlobalValue *GV) { - return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; +bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) { + return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS; } bool shouldEmitConstantsToTextSection(const Triple &TT) { @@ -736,6 +737,60 @@ return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); } - } // end namespace AMDGPU + } // end namespace llvm + +const unsigned AMDGPUAS::MAX_COMMON_ADDRESS; +const unsigned AMDGPUAS::GLOBAL_ADDRESS; +const unsigned AMDGPUAS::LOCAL_ADDRESS; +const unsigned AMDGPUAS::PARAM_D_ADDRESS; +const unsigned AMDGPUAS::PARAM_I_ADDRESS; +const unsigned AMDGPUAS::CONSTANT_BUFFER_0; +const unsigned AMDGPUAS::CONSTANT_BUFFER_1; +const unsigned AMDGPUAS::CONSTANT_BUFFER_2; +const unsigned AMDGPUAS::CONSTANT_BUFFER_3; +const unsigned AMDGPUAS::CONSTANT_BUFFER_4; +const unsigned AMDGPUAS::CONSTANT_BUFFER_5; +const unsigned AMDGPUAS::CONSTANT_BUFFER_6; +const unsigned AMDGPUAS::CONSTANT_BUFFER_7; +const unsigned AMDGPUAS::CONSTANT_BUFFER_8; +const unsigned AMDGPUAS::CONSTANT_BUFFER_9; +const unsigned AMDGPUAS::CONSTANT_BUFFER_10; +const unsigned AMDGPUAS::CONSTANT_BUFFER_11; +const unsigned AMDGPUAS::CONSTANT_BUFFER_12; +const unsigned AMDGPUAS::CONSTANT_BUFFER_13; +const unsigned AMDGPUAS::CONSTANT_BUFFER_14; +const unsigned AMDGPUAS::CONSTANT_BUFFER_15; +const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE; + +namespace llvm { +namespace AMDGPU { + +AMDGPUAS getAMDGPUAS(Triple T) { + auto Env = T.getEnvironmentName(); + AMDGPUAS AS; + if (Env == "amdgiz" || Env == "amdgizcl") { + AS.FLAT_ADDRESS = 0; + AS.CONSTANT_ADDRESS = 4; + AS.PRIVATE_ADDRESS = 5; + AS.REGION_ADDRESS = 2; + } + else { + AS.FLAT_ADDRESS = 4; + AS.CONSTANT_ADDRESS = 2; + AS.PRIVATE_ADDRESS = 0; + AS.REGION_ADDRESS = 5; + } + return AS; +} + +AMDGPUAS getAMDGPUAS(const TargetMachine &M) { + return getAMDGPUAS(M.getTargetTriple()); +} + +AMDGPUAS getAMDGPUAS(const Module &M) { + return getAMDGPUAS(Triple(M.getTargetTriple())); +} +} // namespace AMDGPU +} // namespace llvm