Index: lib/Target/AMDGPU/AMDGPU.h =================================================================== --- lib/Target/AMDGPU/AMDGPU.h +++ lib/Target/AMDGPU/AMDGPU.h @@ -24,6 +24,7 @@ class Target; class TargetMachine; class PassRegistry; +class Module; // R600 Passes FunctionPass *createR600VectorRegMerger(TargetMachine &tm); @@ -167,44 +168,54 @@ /// however on the GPU, each address space points to /// a separate piece of memory that is unique from other /// memory locations. -namespace AMDGPUAS { -enum AddressSpaces : unsigned { - PRIVATE_ADDRESS = 0, ///< Address space for private memory. - GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). - CONSTANT_ADDRESS = 2, ///< Address space for constant memory (VTX2) - LOCAL_ADDRESS = 3, ///< Address space for local memory. - FLAT_ADDRESS = 4, ///< Address space for flat memory. - REGION_ADDRESS = 5, ///< Address space for region memory. - PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0) - PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1) +struct AMDGPUAS { + // The following address space values depend on the triple environment. + unsigned PRIVATE_ADDRESS; ///< Address space for private memory. + unsigned CONSTANT_ADDRESS; ///< Address space for constant memory (VTX2) + unsigned FLAT_ADDRESS; ///< Address space for flat memory. + unsigned REGION_ADDRESS; ///< Address space for region memory. + + // The maximum value for flat, generic, local, private, constant and region. + const static unsigned MAX_COMMON_ADDRESS = 6; + + const static unsigned GLOBAL_ADDRESS = 1; ///< Address space for global memory (RAT0, VTX0). + const static unsigned LOCAL_ADDRESS = 3; ///< Address space for local memory. + const static unsigned PARAM_D_ADDRESS = 6; ///< Address space for direct addressible parameter memory (CONST0) + const static unsigned PARAM_I_ADDRESS = 7; ///< Address space for indirect addressible parameter memory (VTX1) // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on this // order to be able to dynamically index a constant buffer, for example: // // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx - CONSTANT_BUFFER_0 = 8, - CONSTANT_BUFFER_1 = 9, - CONSTANT_BUFFER_2 = 10, - CONSTANT_BUFFER_3 = 11, - CONSTANT_BUFFER_4 = 12, - CONSTANT_BUFFER_5 = 13, - CONSTANT_BUFFER_6 = 14, - CONSTANT_BUFFER_7 = 15, - CONSTANT_BUFFER_8 = 16, - CONSTANT_BUFFER_9 = 17, - CONSTANT_BUFFER_10 = 18, - CONSTANT_BUFFER_11 = 19, - CONSTANT_BUFFER_12 = 20, - CONSTANT_BUFFER_13 = 21, - CONSTANT_BUFFER_14 = 22, - CONSTANT_BUFFER_15 = 23, + const static unsigned CONSTANT_BUFFER_0 = 8; + const static unsigned CONSTANT_BUFFER_1 = 9; + const static unsigned CONSTANT_BUFFER_2 = 10; + const static unsigned CONSTANT_BUFFER_3 = 11; + const static unsigned CONSTANT_BUFFER_4 = 12; + const static unsigned CONSTANT_BUFFER_5 = 13; + const static unsigned CONSTANT_BUFFER_6 = 14; + const static unsigned CONSTANT_BUFFER_7 = 15; + const static unsigned CONSTANT_BUFFER_8 = 16; + const static unsigned CONSTANT_BUFFER_9 = 17; + const static unsigned CONSTANT_BUFFER_10 = 18; + const static unsigned CONSTANT_BUFFER_11 = 19; + const static unsigned CONSTANT_BUFFER_12 = 20; + const static unsigned CONSTANT_BUFFER_13 = 21; + const static unsigned CONSTANT_BUFFER_14 = 22; + const static unsigned CONSTANT_BUFFER_15 = 23; // Some places use this if the address space can't be determined. - UNKNOWN_ADDRESS_SPACE = ~0u + const static unsigned UNKNOWN_ADDRESS_SPACE = ~0u; }; -} // namespace AMDGPUAS +namespace llvm { +namespace AMDGPU { +AMDGPUAS getAMDGPUAS(const Module &M); +AMDGPUAS getAMDGPUAS(const TargetMachine &TM); +AMDGPUAS getAMDGPUAS(Triple T); +} // namespace AMDGPU +} // namespace llvm /// AMDGPU-specific synchronization scopes. enum class AMDGPUSynchronizationScope : uint8_t { Index: lib/Target/AMDGPU/AMDGPUAliasAnalysis.h =================================================================== --- lib/Target/AMDGPU/AMDGPUAliasAnalysis.h +++ lib/Target/AMDGPU/AMDGPUAliasAnalysis.h @@ -13,6 +13,7 @@ #ifndef LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H #define LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H +#include "AMDGPU.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" @@ -25,11 +26,14 @@ friend AAResultBase; const DataLayout &DL; + AMDGPUAS AS; public: - explicit AMDGPUAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {} + explicit AMDGPUAAResult(const DataLayout &DL, Triple T) : AAResultBase(), + DL(DL), AS(AMDGPU::getAMDGPUAS(T)), ASAliasRules(AS) {} AMDGPUAAResult(AMDGPUAAResult &&Arg) - : AAResultBase(std::move(Arg)), DL(Arg.DL){} + : AAResultBase(std::move(Arg)), DL(Arg.DL), AS(Arg.AS), + ASAliasRules(Arg.ASAliasRules){} /// Handle invalidation events from the new pass manager. /// @@ -42,6 +46,15 @@ private: bool Aliases(const MDNode *A, const MDNode *B) const; bool PathAliases(const MDNode *A, const MDNode *B) const; + + class ASAliasRulesTy { + public: + ASAliasRulesTy(AMDGPUAS AS); + ASAliasRulesTy(ASAliasRulesTy &R):ASToIndex(R.ASToIndex){} + AliasResult getAliasResult(unsigned AS1, unsigned AS2); + private: + DenseMap ASToIndex; + } ASAliasRules; }; /// Analysis pass providing a never-invalidated alias analysis result. @@ -53,7 +66,8 @@ typedef AMDGPUAAResult Result; AMDGPUAAResult run(Function &F, AnalysisManager &AM) { - return AMDGPUAAResult(F.getParent()->getDataLayout()); + return AMDGPUAAResult(F.getParent()->getDataLayout(), + Triple(F.getParent()->getTargetTriple())); } }; @@ -72,7 +86,8 @@ const AMDGPUAAResult &getResult() const { return *Result; } bool doInitialization(Module &M) override { - Result.reset(new AMDGPUAAResult(M.getDataLayout())); + Result.reset(new AMDGPUAAResult(M.getDataLayout(), + Triple(M.getTargetTriple()))); return false; } bool doFinalization(Module &M) override { Index: lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -37,26 +37,48 @@ AU.setPreservesAll(); } -AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { +// Must match the table in getAliasResult. +AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS) { + ASToIndex[AS.PRIVATE_ADDRESS] = 0; + ASToIndex[AS.GLOBAL_ADDRESS] = 1; + ASToIndex[AS.CONSTANT_ADDRESS] = 2; + ASToIndex[AS.LOCAL_ADDRESS] = 3; + ASToIndex[AS.FLAT_ADDRESS] = 4; +} + +AliasResult AMDGPUAAResult::ASAliasRulesTy::getAliasResult(unsigned AS1, + unsigned AS2) { // This array is indexed by the AMDGPUAS::AddressSpaces // enum elements PRIVATE_ADDRESS ... to FLAT_ADDRESS // see "llvm/Transforms/AMDSPIRUtils.h" + // The order must match ASToIndex. static const AliasResult ASAliasRules[5][5] = { - /* Private Global Constant Group Flat */ - /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias}, - /* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias}, - /* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias}, - /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}, - /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias} + /* Private Global Constant Group Flat */ + /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias}, + /* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias}, + /* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias}, + /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}, + /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias} }; + auto Index1 = ASToIndex[AS1]; + auto Index2 = ASToIndex[AS2]; + assert(0 <= Index1 && Index1 < 5 && 0 <= Index2 && Index2 < 5); + return ASAliasRules[Index1][Index2]; +} + +AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) { unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace(); unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace(); - if (asA > AMDGPUAS::AddressSpaces::FLAT_ADDRESS || - asB > AMDGPUAS::AddressSpaces::FLAT_ADDRESS) + assert(AS.FLAT_ADDRESS <= AS.MAX_COMMON_ADDRESS && + AS.GLOBAL_ADDRESS <= AS.MAX_COMMON_ADDRESS && + AS.LOCAL_ADDRESS <= AS.MAX_COMMON_ADDRESS && + AS.CONSTANT_ADDRESS <= AS.MAX_COMMON_ADDRESS && + AS.PRIVATE_ADDRESS <= AS.MAX_COMMON_ADDRESS); + if (asA > AS.MAX_COMMON_ADDRESS || asB > AS.MAX_COMMON_ADDRESS) report_fatal_error("Pointer address space out of range"); - AliasResult Result = ASAliasRules[asA][asB]; + AliasResult Result = ASAliasRules.getAliasResult(asA, asB); if (Result == NoAlias) return Result; if (isa(LocA.Ptr) && isa(LocB.Ptr)) { @@ -75,8 +97,7 @@ bool OrLocal) { const Value *Base = GetUnderlyingObject(Loc.Ptr, DL); - if (Base->getType()->getPointerAddressSpace() == - AMDGPUAS::AddressSpaces::CONSTANT_ADDRESS) { + if (Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS) { return true; } Index: lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -28,7 +28,8 @@ class AMDGPUAnnotateKernelFeatures : public ModulePass { private: const TargetMachine *TM; - static bool hasAddrSpaceCast(const Function &F); + AMDGPUAS AS; + static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS); void addAttrToCallers(Function *Intrin, StringRef AttrName); bool addAttrsForIntrinsics(Module &M, ArrayRef); @@ -48,10 +49,11 @@ ModulePass::getAnalysisUsage(AU); } - static bool visitConstantExpr(const ConstantExpr *CE); + static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS); static bool visitConstantExprsRecursively( const Constant *EntryC, - SmallPtrSet &ConstantExprVisited); + SmallPtrSet &ConstantExprVisited, + AMDGPUAS AS); }; } @@ -65,18 +67,20 @@ // The queue ptr is only needed when casting to flat, not from it. -static bool castRequiresQueuePtr(unsigned SrcAS) { - return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; +static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) { + return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS; } -static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) { - return castRequiresQueuePtr(ASC->getSrcAddressSpace()); +static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC, + const AMDGPUAS &AS) { + return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS); } -bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { +bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE, + AMDGPUAS AS) { if (CE->getOpcode() == Instruction::AddrSpaceCast) { unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); - return castRequiresQueuePtr(SrcAS); + return castRequiresQueuePtr(SrcAS, AS); } return false; @@ -84,7 +88,8 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( const Constant *EntryC, - SmallPtrSet &ConstantExprVisited) { + SmallPtrSet &ConstantExprVisited, + AMDGPUAS AS) { if (!ConstantExprVisited.insert(EntryC).second) return false; @@ -97,7 +102,7 @@ // Check this constant expression. if (const auto *CE = dyn_cast(C)) { - if (visitConstantExpr(CE)) + if (visitConstantExpr(CE, AS)) return true; } @@ -118,13 +123,14 @@ } // Return true if an addrspacecast is used that requires the queue ptr. -bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) { +bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F, + AMDGPUAS AS) { SmallPtrSet ConstantExprVisited; for (const BasicBlock &BB : F) { for (const Instruction &I : BB) { if (const AddrSpaceCastInst *ASC = dyn_cast(&I)) { - if (castRequiresQueuePtr(ASC)) + if (castRequiresQueuePtr(ASC, AS)) return true; } @@ -133,7 +139,7 @@ if (!OpC) continue; - if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) + if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) return true; } } @@ -173,6 +179,7 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { Triple TT(M.getTargetTriple()); + AS = AMDGPU::getAMDGPUAS(M); static const StringRef IntrinsicToAttr[][2] = { // .x omitted @@ -216,7 +223,7 @@ bool HasApertureRegs = TM && TM->getSubtarget(F).hasApertureRegs(); - if (!HasApertureRegs && hasAddrSpaceCast(F)) + if (!HasApertureRegs && hasAddrSpaceCast(F, AS)) F.addFnAttr("amdgpu-queue-ptr"); } } Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H +#include "AMDGPU.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include @@ -142,6 +143,7 @@ protected: std::vector DisasmLines, HexLines; size_t DisasmLineMaxLen; + AMDGPUAS AMDGPUASI; }; } // end namespace llvm Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -17,6 +17,7 @@ // #include "AMDGPUAsmPrinter.h" +#include "AMDGPUTargetMachine.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "InstPrinter/AMDGPUInstPrinter.h" #include "Utils/AMDGPUBaseInfo.h" @@ -93,7 +94,9 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) - : AsmPrinter(TM, std::move(Streamer)) {} + : AsmPrinter(TM, std::move(Streamer)) { + AMDGPUASI = static_cast(&TM)->getAMDGPUAS(); + } StringRef AMDGPUAsmPrinter::getPassName() const { return "AMDGPU Assembly Printer"; @@ -159,7 +162,7 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Group segment variables aren't emitted in HSA. - if (AMDGPU::isGroupSegment(GV)) + if (AMDGPU::isGroupSegment(GV, AMDGPUASI)) return; AsmPrinter::EmitGlobalVariable(GV); Index: lib/Target/AMDGPU/AMDGPUCallLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUCallLowering.h +++ lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H +#include "AMDGPU.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" namespace llvm { @@ -22,6 +23,7 @@ class AMDGPUTargetLowering; class AMDGPUCallLowering: public CallLowering { + AMDGPUAS AMDGPUASI; unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy, unsigned Offset) const; Index: lib/Target/AMDGPU/AMDGPUCallLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -31,7 +31,7 @@ #endif AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) - : CallLowering(&TLI) { + : CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) { } bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, @@ -49,7 +49,7 @@ MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = *MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); - PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS); LLT PtrType = getLLTForType(*PtrTy, DL); unsigned DstReg = MRI.createGenericVirtualRegister(PtrType); unsigned KernArgSegmentPtr = @@ -70,7 +70,7 @@ MachineFunction &MF = MIRBuilder.getMF(); const Function &F = *MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); - PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); unsigned TypeSize = DL.getTypeStoreSize(ParamTy); unsigned Align = DL.getABITypeAlignment(ParamTy); Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -67,10 +67,13 @@ // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can // make the right decision when generating code for different targets. const AMDGPUSubtarget *Subtarget; + AMDGPUAS AMDGPUASI; public: explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(TM, OptLevel) {} + : SelectionDAGISel(TM, OptLevel){ + AMDGPUASI = AMDGPU::getAMDGPUAS(TM); + } ~AMDGPUDAGToDAGISel() override = default; bool runOnMachineFunction(MachineFunction &MF) override; @@ -586,7 +589,7 @@ if (!N->readMem()) return false; if (CbId == -1) - return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; + return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; } @@ -1536,7 +1539,7 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { MemSDNode *Mem = cast(N); unsigned AS = Mem->getAddressSpace(); - if (AS == AMDGPUAS::FLAT_ADDRESS) { + if (AS == AMDGPUASI.FLAT_ADDRESS) { SelectCode(N); return; } Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H +#include "AMDGPU.h" #include "llvm/Target/TargetLowering.h" namespace llvm { @@ -34,6 +35,7 @@ protected: const AMDGPUSubtarget *Subtarget; + AMDGPUAS AMDGPUASI; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; @@ -224,6 +226,10 @@ /// type of implicit parameter. uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const; + + AMDGPUAS getAMDGPUAS() const { + return AMDGPUASI; + } }; namespace AMDGPUISD { Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -59,6 +59,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { + AMDGPUASI = AMDGPU::getAMDGPUAS(TM); // Lower floating point store/load to integer store/load to reduce the number // of patterns in tablegen. setOperationAction(ISD::LOAD, MVT::f32, Promote); Index: lib/Target/AMDGPU/AMDGPUInstrInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H +#include "AMDGPU.h" #include "llvm/Target/TargetInstrInfo.h" #include "Utils/AMDGPUBaseInfo.h" @@ -35,6 +36,8 @@ const AMDGPUSubtarget &ST; virtual void anchor(); +protected: + AMDGPUAS AMDGPUASI; public: explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st); Index: lib/Target/AMDGPU/AMDGPUInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -30,7 +30,7 @@ void AMDGPUInstrInfo::anchor() {} AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) - : AMDGPUGenInstrInfo(-1, -1), ST(ST) {} + : AMDGPUGenInstrInfo(-1, -1), ST(ST), AMDGPUASI(ST.getAMDGPUAS()) {} // FIXME: This behaves strangely. If, for example, you have 32 load + stores, // the first 16 loads will be interleaved with the stores, and the next 16 will Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -14,6 +14,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H +#include "AMDGPU.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" @@ -35,7 +36,6 @@ const AMDGPURegisterBankInfo &RBI); bool select(MachineInstr &I) const override; - private: struct GEPInfo { const MachineInstr &GEP; @@ -59,6 +59,8 @@ const SIInstrInfo &TII; const SIRegisterInfo &TRI; const AMDGPURegisterBankInfo &RBI; +protected: + AMDGPUAS AMDGPUASI; }; } // End llvm namespace. Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -33,7 +33,7 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector( const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI) : InstructionSelector(), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI) {} + TRI(*STI.getRegisterInfo()), RBI(RBI), AMDGPUASI(STI.getAMDGPUAS()) {} MachineOperand AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, @@ -291,7 +291,7 @@ if (!I.hasOneMemOperand()) return false; - if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS) + if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS) return false; if (!isInstrUniform(I)) Index: lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructions.td +++ lib/Target/AMDGPU/AMDGPUInstructions.td @@ -204,7 +204,7 @@ //===----------------------------------------------------------------------===// class PrivateMemOp : PatFrag (N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS; }]>; class PrivateLoad : PrivateMemOp < @@ -222,7 +222,7 @@ def store_private : PrivateStore ; class GlobalMemOp : PatFrag (N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS; }]>; // Global address space loads @@ -242,7 +242,7 @@ class ConstantMemOp : PatFrag (N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; }]>; // Constant address space loads @@ -253,7 +253,7 @@ def constant_load : ConstantLoad; class LocalMemOp : PatFrag (N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; // Local address space loads @@ -266,7 +266,7 @@ >; class FlatMemOp : PatFrag (N)->getAddressSPace() == AMDGPUAS::FLAT_ADDRESS; + return cast(N)->getAddressSPace() == AMDGPUASI.FLAT_ADDRESS; }]>; class FlatLoad : FlatMemOp < Index: lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -72,6 +72,7 @@ Module *Mod = nullptr; const DataLayout *DL = nullptr; MDNode *MaxWorkGroupSizeRange = nullptr; + AMDGPUAS AS; // FIXME: This should be per-kernel. uint32_t LocalMemLimit = 0; @@ -154,6 +155,7 @@ const AMDGPUSubtarget &ST = TM->getSubtarget(F); if (!ST.isPromoteAllocaEnabled()) return false; + AS = AMDGPU::getAMDGPUAS(*F.getParent()); FunctionType *FTy = F.getFunctionType(); @@ -317,7 +319,7 @@ Type *I32Ty = Type::getInt32Ty(Mod->getContext()); Value *CastDispatchPtr = Builder.CreateBitCast( - DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS)); + DispatchPtr, PointerType::get(I32Ty, AS.CONSTANT_ADDRESS)); // We could do a single 64-bit load here, but it's likely that the basic // 32-bit and extract sequence is already present, and it is probably easier @@ -413,7 +415,7 @@ } } -static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { +static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { ArrayType *AllocaTy = dyn_cast(Alloca->getAllocatedType()); DEBUG(dbgs() << "Alloca candidate for vectorization\n"); @@ -468,7 +470,7 @@ IRBuilder<> Builder(Inst); switch (Inst->getOpcode()) { case Instruction::Load: { - Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); + Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); Value *Ptr = Inst->getOperand(0); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); @@ -480,7 +482,7 @@ break; } case Instruction::Store: { - Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); + Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); Value *Ptr = Inst->getOperand(1); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); @@ -673,7 +675,7 @@ DEBUG(dbgs() << "Trying to promote " << I << '\n'); - if (tryPromoteAllocaToVector(&I)) { + if (tryPromoteAllocaToVector(&I, AS)) { DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); return; } Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -156,6 +156,7 @@ InstrItineraryData InstrItins; SelectionDAGTargetInfo TSInfo; + AMDGPUAS AS; public: AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, @@ -213,6 +214,10 @@ return MaxPrivateElementSize; } + AMDGPUAS getAMDGPUAS() const { + return AS; + } + bool has16BitInsts() const { return Has16BitInsts; } Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -135,6 +135,7 @@ FeatureDisable(false), InstrItins(getInstrItineraryForCPU(GPU)) { + AS = AMDGPU::getAMDGPUAS(TT); initializeSubtargetDependencies(TT, GPU, FS); } Index: lib/Target/AMDGPU/AMDGPUTargetMachine.h =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -35,6 +35,7 @@ protected: std::unique_ptr TLOF; AMDGPUIntrinsicInfo IntrinsicInfo; + AMDGPUAS AS; StringRef getGPUName(const Function &F) const; StringRef getFeatureString(const Function &F) const; @@ -57,19 +58,18 @@ TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + AMDGPUAS getAMDGPUAS() const { + return AS; + } void addPreLinkPasses(PassManagerBase & PM) override; void adjustPassManager(PassManagerBuilder &) override; /// Get the integer value of a null pointer in the given address space. uint64_t getNullPointerValue(unsigned AddrSpace) const { - switch(AddrSpace) { - case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: + if (AddrSpace == AS.LOCAL_ADDRESS || AddrSpace == AS.REGION_ADDRESS) return -1; - default: - return 0; - } + return 0; } }; Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -214,6 +214,7 @@ : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU), FS, Options, getEffectiveRelocModel(RM), CM, OptLevel), TLOF(createTLOF(getTargetTriple())) { + AS = AMDGPU::getAMDGPUAS(TT); initAsmInfo(); } @@ -762,3 +763,4 @@ TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { return new GCNPassConfig(this, PM); } + Index: lib/Target/AMDGPU/AMDGPUTargetObjectFile.h =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetObjectFile.h +++ lib/Target/AMDGPU/AMDGPUTargetObjectFile.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H +#include "AMDGPU.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Target/TargetMachine.h" Index: lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp +++ lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "AMDGPUTargetMachine.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPU.h" #include "llvm/MC/MCContext.h" @@ -22,7 +23,8 @@ MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { - if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO) && + auto AS = static_cast(&TM)->getAMDGPUAS(); + if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO, AS) && AMDGPU::shouldEmitConstantsToTextSection(TM.getTargetTriple())) return TextSection; Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -110,7 +110,7 @@ if (IsGraphicsShader) return -1; return ST->hasFlatAddressSpace() ? - AMDGPUAS::FLAT_ADDRESS : AMDGPUAS::UNKNOWN_ADDRESS_SPACE; + ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE; } unsigned getVectorSplitCost() { return 0; } Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -48,7 +48,7 @@ const DataLayout &DL = BB->getModule()->getDataLayout(); for (const Instruction &I : *BB) { const GetElementPtrInst *GEP = dyn_cast(&I); - if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) + if (!GEP || GEP->getAddressSpace() != ST->getAMDGPUAS().PRIVATE_ADDRESS) continue; const Value *Ptr = GEP->getPointerOperand(); @@ -108,25 +108,24 @@ } unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { - switch (AddrSpace) { - case AMDGPUAS::GLOBAL_ADDRESS: - case AMDGPUAS::CONSTANT_ADDRESS: - case AMDGPUAS::FLAT_ADDRESS: + AMDGPUAS AS = ST->getAMDGPUAS(); + if (AddrSpace == AS.GLOBAL_ADDRESS || + AddrSpace == AS.CONSTANT_ADDRESS || + AddrSpace == AS.FLAT_ADDRESS) return 128; - case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: + if (AddrSpace == AS.LOCAL_ADDRESS || + AddrSpace == AS.REGION_ADDRESS) return 64; - case AMDGPUAS::PRIVATE_ADDRESS: + if (AddrSpace == AS.PRIVATE_ADDRESS) return 8 * ST->getMaxPrivateElementSize(); - default: - if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS && - (AddrSpace == AMDGPUAS::PARAM_D_ADDRESS || - AddrSpace == AMDGPUAS::PARAM_I_ADDRESS || - (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 && - AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15))) - return 128; - llvm_unreachable("unhandled address space"); - } + + if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS && + (AddrSpace == AS.PARAM_D_ADDRESS || + AddrSpace == AS.PARAM_I_ADDRESS || + (AddrSpace >= AS.CONSTANT_BUFFER_0 && + AddrSpace <= AS.CONSTANT_BUFFER_15))) + return 128; + llvm_unreachable("unhandled address space"); } bool AMDGPUTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, @@ -135,7 +134,7 @@ // We allow vectorization of flat stores, even though we may need to decompose // them later if they may access private memory. We don't have enough context // here, and legalization can handle it. - if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) { + if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS) { return (Alignment >= 4 || ST->hasUnalignedScratchAccess()) && ChainSizeInBytes <= ST->getMaxPrivateElementSize(); } @@ -362,7 +361,7 @@ // All other loads are not divergent, because if threads issue loads with the // same arguments, they will always get the same result. if (const LoadInst *Load = dyn_cast(V)) - return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; + return Load->getPointerAddressSpace() == ST->getAMDGPUAS().PRIVATE_ADDRESS; // Atomics are divergent because they are executed sequentially: when an // atomic operation refers to the same address in each thread, then each Index: lib/Target/AMDGPU/AMDGPUclpVectorExpansion.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUclpVectorExpansion.cpp +++ lib/Target/AMDGPU/AMDGPUclpVectorExpansion.cpp @@ -448,6 +448,7 @@ typedef StringMap an_expansionInfo_t; an_expansionInfo_t ExpansionInfo; std::unique_ptr TempModule; + AMDGPUAS AS; char getAddrSpaceCode(StringRef); std::string getNextFunctionName(StringRef, an_typedes_t, int, char); @@ -555,12 +556,12 @@ size_t SeparatorPos = CurFuncName.rfind("AS"); assert(SeparatorPos != StringRef::npos); AddrSpaceCode = CurFuncName[SeparatorPos + 2]; - assert('0' <= AddrSpaceCode && AddrSpaceCode <= '4'); + assert('0' <= AddrSpaceCode && AddrSpaceCode <= '5'); } // make sure it's a pointer to private address space else { assert(CurFuncName.rfind('P') != StringRef::npos); - AddrSpaceCode = (AMDGPUAS::PRIVATE_ADDRESS + '0'); + AddrSpaceCode = '0'; } return AddrSpaceCode; } // AMDGPUclpVectorExpansion::getAddrSpaceCode @@ -1045,6 +1046,7 @@ /// process an LLVM module to perform expand vector builtin calls /// bool AMDGPUclpVectorExpansion::runOnModule(Module &TheModule) { + AS = AMDGPU::getAMDGPUAS(TheModule); TempModule.reset( new Module("__opencllib_vectorexpansion", TheModule.getContext())); TempModule->setDataLayout(TheModule.getDataLayout()); Index: lib/Target/AMDGPU/BUFInstructions.td =================================================================== --- lib/Target/AMDGPU/BUFInstructions.td +++ lib/Target/AMDGPU/BUFInstructions.td @@ -21,8 +21,8 @@ class MubufLoad : PatFrag < (ops node:$ptr), (op node:$ptr), [{ auto const AS = cast(N)->getAddressSpace(); - return AS == AMDGPUAS::GLOBAL_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS; + return AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.CONSTANT_ADDRESS; }]>; def mubuf_load : MubufLoad ; Index: lib/Target/AMDGPU/FLATInstructions.td =================================================================== --- lib/Target/AMDGPU/FLATInstructions.td +++ lib/Target/AMDGPU/FLATInstructions.td @@ -136,7 +136,7 @@ class flat_binary_atomic_op : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}] + [{return cast(N)->getAddressSpace() == AMDGPUASI.FLAT_ADDRESS;}] >; def atomic_cmp_swap_flat : flat_binary_atomic_op; @@ -284,16 +284,16 @@ class flat_ld : PatFrag<(ops node:$ptr), (ld node:$ptr), [{ auto const AS = cast(N)->getAddressSpace(); - return AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::GLOBAL_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS; + return AS == AMDGPUASI.FLAT_ADDRESS || + AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.CONSTANT_ADDRESS; }]>; class flat_st : PatFrag<(ops node:$val, node:$ptr), (st node:$val, node:$ptr), [{ auto const AS = cast(N)->getAddressSpace(); - return AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::GLOBAL_ADDRESS; + return AS == AMDGPUASI.FLAT_ADDRESS || + AS == AMDGPUASI.GLOBAL_ADDRESS; }]>; def atomic_flat_load : flat_ld ; Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp @@ -208,26 +208,24 @@ } } -static KernelArg::AddressSpaceQualifer getRuntimeAddrSpace( - AMDGPUAS::AddressSpaces A) { - switch (A) { - case AMDGPUAS::GLOBAL_ADDRESS: +static KernelArg::AddressSpaceQualifer getRuntimeAddrSpace(unsigned A, + AMDGPUAS AS) { + if (A == AS.GLOBAL_ADDRESS) return KernelArg::Global; - case AMDGPUAS::CONSTANT_ADDRESS: + else if (A == AS.CONSTANT_ADDRESS) return KernelArg::Constant; - case AMDGPUAS::LOCAL_ADDRESS: + else if (A == AS.LOCAL_ADDRESS) return KernelArg::Local; - case AMDGPUAS::FLAT_ADDRESS: + else if (A == AS.FLAT_ADDRESS) return KernelArg::Generic; - case AMDGPUAS::REGION_ADDRESS: + else if (A == AS.REGION_ADDRESS) return KernelArg::Region; - default: + else return KernelArg::Private; - } } static KernelArg::Metadata getRuntimeMDForKernelArg(const DataLayout &DL, - Type *T, KernelArg::Kind Kind, StringRef BaseTypeName = "", + AMDGPUAS AS, Type *T, KernelArg::Kind Kind, StringRef BaseTypeName = "", StringRef TypeName = "", StringRef ArgName = "", StringRef TypeQual = "", StringRef AccQual = "") { KernelArg::Metadata Arg; @@ -279,14 +277,14 @@ // Set ArgAddrQual. if (auto *PT = dyn_cast(T)) { - Arg.AddrQual = getRuntimeAddrSpace(static_cast( - PT->getAddressSpace())); + Arg.AddrQual = getRuntimeAddrSpace(PT->getAddressSpace(), AS); } return Arg; } -static Kernel::Metadata getRuntimeMDForKernel(const Function &F) { +static Kernel::Metadata getRuntimeMDForKernel(const Function &F, + AMDGPUAS AS) { Kernel::Metadata Kernel; Kernel.Name = F.getName(); auto &M = *F.getParent(); @@ -340,23 +338,23 @@ KernelArg::DynamicSharedPointer : KernelArg::GlobalBuffer) : KernelArg::ByValue); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, T, Kind, + Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, AS, T, Kind, BaseTypeName, TypeName, ArgName, TypeQual, AccQual)); } // Emit hidden kernel arguments for OpenCL kernels. if (F.getParent()->getNamedMetadata("opencl.ocl.version")) { auto Int64T = Type::getInt64Ty(F.getContext()); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T, + Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, AS, Int64T, KernelArg::HiddenGlobalOffsetX)); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T, + Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, AS, Int64T, KernelArg::HiddenGlobalOffsetY)); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T, + Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, AS, Int64T, KernelArg::HiddenGlobalOffsetZ)); if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) { auto Int8PtrT = Type::getInt8PtrTy(F.getContext(), KernelArg::Global); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int8PtrT, + Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, AS, Int8PtrT, KernelArg::HiddenPrintfBuffer)); } } @@ -422,6 +420,7 @@ std::string llvm::getRuntimeMDYAMLString(const FeatureBitset &Features, const Module &M) { + auto AS = AMDGPU::getAMDGPUAS(M); Program::Metadata Prog; Prog.MDVersionSeq.push_back(MDVersion); Prog.MDVersionSeq.push_back(MDRevision); @@ -442,7 +441,7 @@ for (auto &F: M.functions()) { if (!F.getMetadata("kernel_arg_type")) continue; - Prog.Kernels.emplace_back(getRuntimeMDForKernel(F)); + Prog.Kernels.emplace_back(getRuntimeMDForKernel(F, AS)); } auto YAML = Prog.toYAML(); Index: lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.cpp +++ lib/Target/AMDGPU/R600ISelLowering.cpp @@ -707,12 +707,12 @@ SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *GSD = cast(Op); - if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) + if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS) return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); const DataLayout &DL = DAG.getDataLayout(); const GlobalValue *GV = GSD->getGlobal(); - MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); + MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS); SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT); return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA); @@ -1107,7 +1107,7 @@ //TODO: Who creates the i8 stores? assert(Store->isTruncatingStore() || Store->getValue().getValueType() == MVT::i8); - assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS); + assert(Store->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS); SDValue Mask; if (Store->getMemoryVT() == MVT::i8) { @@ -1205,9 +1205,10 @@ SDLoc DL(Op); // Neither LOCAL nor PRIVATE can do vectors at the moment - if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) && + if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) && VT.isVector()) { - if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) { + if ((AS == AMDGPUASI.PRIVATE_ADDRESS) && + StoreNode->isTruncatingStore()) { // Add an extra level of chain to isolate this vector SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain); // TODO: can the chain be replaced without creating a new store? @@ -1283,7 +1284,7 @@ } // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes - if (AS != AMDGPUAS::PRIVATE_ADDRESS) + if (AS != AMDGPUASI.PRIVATE_ADDRESS) return SDValue(); if (MemVT.bitsLT(MVT::i32)) @@ -1402,7 +1403,7 @@ EVT MemVT = LoadNode->getMemoryVT(); ISD::LoadExtType ExtType = LoadNode->getExtensionType(); - if (AS == AMDGPUAS::PRIVATE_ADDRESS && + if (AS == AMDGPUASI.PRIVATE_ADDRESS && ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) { return lowerPrivateExtLoad(Op, DAG); } @@ -1412,8 +1413,8 @@ SDValue Chain = LoadNode->getChain(); SDValue Ptr = LoadNode->getBasePtr(); - if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || - LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && + if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS || + LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) && VT.isVector()) { return scalarizeVectorLoad(LoadNode, DAG); } @@ -1486,7 +1487,7 @@ return DAG.getMergeValues(MergedValues, DL); } - if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { + if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) { return SDValue(); } Index: lib/Target/AMDGPU/R600Instructions.td =================================================================== --- lib/Target/AMDGPU/R600Instructions.td +++ lib/Target/AMDGPU/R600Instructions.td @@ -326,8 +326,8 @@ class LoadVtxId1 : PatFrag < (ops node:$ptr), (load node:$ptr), [{ const MemSDNode *LD = cast(N); - return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || - (LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + return LD->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS || + (LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && !isa(GetUnderlyingObject( LD->getMemOperand()->getValue(), CurDAG->getDataLayout()))); }]>; @@ -339,7 +339,7 @@ class LoadVtxId2 : PatFrag < (ops node:$ptr), (load node:$ptr), [{ const MemSDNode *LD = cast(N); - return LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + return LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && isa(GetUnderlyingObject( LD->getMemOperand()->getValue(), CurDAG->getDataLayout())); }]>; Index: lib/Target/AMDGPU/SIFrameLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIFrameLowering.cpp +++ lib/Target/AMDGPU/SIFrameLowering.cpp @@ -202,6 +202,7 @@ // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was // specified. const SISubtarget &ST = MF.getSubtarget(); + auto AMDGPUASI = ST.getAMDGPUAS(); if (ST.debuggerEmitPrologue()) emitDebuggerPrologue(MF, MBB); @@ -340,7 +341,7 @@ PointerType *PtrTy = PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()), - AMDGPUAS::CONSTANT_ADDRESS); + AMDGPUASI.CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); auto MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad | Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -599,8 +599,7 @@ if (AM.BaseGV) return false; - switch (AS) { - case AMDGPUAS::GLOBAL_ADDRESS: + if (AS == AMDGPUASI.GLOBAL_ADDRESS) { if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { // Assume the we will use FLAT for all global memory accesses // on VI. @@ -615,8 +614,7 @@ } return isLegalMUBUFAddressingMode(AM); - - case AMDGPUAS::CONSTANT_ADDRESS: + } else if (AS == AMDGPUASI.CONSTANT_ADDRESS) { // If the offset isn't a multiple of 4, it probably isn't going to be // correctly aligned. // FIXME: Can we get the real alignment here? @@ -654,11 +652,10 @@ return false; - case AMDGPUAS::PRIVATE_ADDRESS: + } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) { return isLegalMUBUFAddressingMode(AM); - - case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: + } else if (AS == AMDGPUASI.LOCAL_ADDRESS || + AS == AMDGPUASI.REGION_ADDRESS) { // Basic, single offset DS instructions allow a 16-bit unsigned immediate // field. // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have @@ -673,17 +670,15 @@ return true; return false; - - case AMDGPUAS::FLAT_ADDRESS: - case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: + } else if (AS == AMDGPUASI.FLAT_ADDRESS || + AS == AMDGPUASI.UNKNOWN_ADDRESS_SPACE) { // For an unknown address space, this usually means that this is for some // reason being used for pure arithmetic, and not based on some addressing // computation. We don't have instructions that compute pointers with any // addressing modes, so treat them as having no offset like flat // instructions. return isLegalFlatAddressingMode(AM); - - default: + } else { llvm_unreachable("unhandled address space"); } } @@ -704,8 +699,8 @@ return false; } - if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || - AddrSpace == AMDGPUAS::REGION_ADDRESS) { + if (AddrSpace == AMDGPUASI.LOCAL_ADDRESS || + AddrSpace == AMDGPUASI.REGION_ADDRESS) { // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte // aligned, 8 byte access in a single operation using ds_read2/write2_b32 // with adjacent offsets. @@ -720,8 +715,8 @@ // will access scratch. If we had access to the IR function, then we // could determine if any private memory was used in the function. if (!Subtarget->hasUnalignedScratchAccess() && - (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS || - AddrSpace == AMDGPUAS::FLAT_ADDRESS)) { + (AddrSpace == AMDGPUASI.PRIVATE_ADDRESS || + AddrSpace == AMDGPUASI.FLAT_ADDRESS)) { return false; } @@ -729,7 +724,7 @@ // If we have an uniform constant load, it still requires using a slow // buffer instruction if unaligned. if (IsFast) { - *IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) ? + *IsFast = (AddrSpace == AMDGPUASI.CONSTANT_ADDRESS) ? (Align % 4 == 0) : true; } @@ -769,15 +764,16 @@ return MVT::Other; } -static bool isFlatGlobalAddrSpace(unsigned AS) { - return AS == AMDGPUAS::GLOBAL_ADDRESS || - AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS; +static bool isFlatGlobalAddrSpace(unsigned AS, AMDGPUAS AMDGPUASI) { + return AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.FLAT_ADDRESS || + AS == AMDGPUASI.CONSTANT_ADDRESS; } bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { - return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS); + return isFlatGlobalAddrSpace(SrcAS, AMDGPUASI) && + isFlatGlobalAddrSpace(DestAS, AMDGPUASI); } bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const { @@ -791,7 +787,7 @@ unsigned DestAS) const { // Flat -> private/local is a simple truncate. // Flat -> global is no-op - if (SrcAS == AMDGPUAS::FLAT_ADDRESS) + if (SrcAS == AMDGPUASI.FLAT_ADDRESS) return true; return isNoopAddrSpaceCast(SrcAS, DestAS); @@ -852,7 +848,7 @@ unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); - MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); + MVT PtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS); SDValue BasePtr = DAG.getCopyFromReg(Chain, SL, MRI.getLiveInVirtReg(InputPtrReg), PtrVT); return DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, @@ -865,7 +861,7 @@ const ISD::InputArg *Arg) const { const DataLayout &DL = DAG.getDataLayout(); Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); - PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); unsigned Align = DL.getABITypeAlignment(Ty); @@ -2208,13 +2204,13 @@ bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const { const Triple &TT = getTargetMachine().getTargetTriple(); - return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + return GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && AMDGPU::shouldEmitConstantsToTextSection(TT); } bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const { return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || - GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) && + GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) && !shouldEmitFixup(GV) && !getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); } @@ -2378,7 +2374,7 @@ // TODO: We should use the value from the IR intrinsic call, but it might not // be available and how do we get it? Value *V = UndefValue::get(PointerType::get(Type::getInt8Ty(*DAG.getContext()), - AMDGPUAS::CONSTANT_ADDRESS)); + AMDGPUASI.CONSTANT_ADDRESS)); MachinePointerInfo PtrInfo(V, StructOffset); return DAG.getLoad(MVT::i32, SL, QueuePtr.getValue(1), Ptr, PtrInfo, @@ -2400,11 +2396,12 @@ auto &MFI = *DAG.getMachineFunction().getInfo(); // flat -> local/private - if (ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS) { + if (ASC->getSrcAddressSpace() == AMDGPUASI.FLAT_ADDRESS) { unsigned DestAS = ASC->getDestAddressSpace(); MFI.HasFlatLocalCasts = DestAS == AMDGPUAS::LOCAL_ADDRESS; - if (DestAS == AMDGPUAS::LOCAL_ADDRESS || DestAS == AMDGPUAS::PRIVATE_ADDRESS) { + if (DestAS == AMDGPUASI.LOCAL_ADDRESS || + DestAS == AMDGPUASI.PRIVATE_ADDRESS) { unsigned NullVal = TM.getNullPointerValue(DestAS); SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32); SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE); @@ -2416,11 +2413,12 @@ } // local/private -> flat - if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) { + if (ASC->getDestAddressSpace() == AMDGPUASI.FLAT_ADDRESS) { unsigned SrcAS = ASC->getSrcAddressSpace(); - MFI.HasFlatLocalCasts = SrcAS == AMDGPUAS::LOCAL_ADDRESS; + MFI.HasFlatLocalCasts = SrcAS == AMDGPUASI.LOCAL_ADDRESS; - if (SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS) { + if (SrcAS == AMDGPUASI.LOCAL_ADDRESS || + SrcAS == AMDGPUASI.PRIVATE_ADDRESS) { unsigned NullVal = TM.getNullPointerValue(SrcAS); SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32); @@ -2520,8 +2518,8 @@ bool SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // We can fold offsets for anything that doesn't require a GOT relocation. - return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || - GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) && + return (GA->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS || + GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) && !shouldEmitGOTReloc(GA->getGlobal()); } @@ -2572,8 +2570,8 @@ SelectionDAG &DAG) const { GlobalAddressSDNode *GSD = cast(Op); - if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && - GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) + if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS && + GSD->getAddressSpace() != AMDGPUASI.GLOBAL_ADDRESS) return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); SDLoc DL(GSD); @@ -2590,7 +2588,7 @@ SIInstrInfo::MO_GOTPCREL32); Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext()); - PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS); const DataLayout &DataLayout = DAG.getDataLayout(); unsigned Align = DataLayout.getABITypeAlignment(PtrTy); // FIXME: Use a PseudoSourceValue once those can be assigned an address space. @@ -3178,21 +3176,19 @@ SIMachineFunctionInfo *MFI = MF.getInfo(); // If there is a possibilty that flat instruction access scratch memory // then we need to use the same legalization rules we use for private. - if (AS == AMDGPUAS::FLAT_ADDRESS) + if (AS == AMDGPUASI.FLAT_ADDRESS) AS = MFI->hasFlatScratchInit() ? - AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS; + AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS; unsigned NumElements = MemVT.getVectorNumElements(); - switch (AS) { - case AMDGPUAS::CONSTANT_ADDRESS: + if (AS == AMDGPUASI.CONSTANT_ADDRESS) { if (isMemOpUniform(Load)) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they // have the same legalization requirements as global and private // loads. // - LLVM_FALLTHROUGH; - case AMDGPUAS::GLOBAL_ADDRESS: + } else if (AS == AMDGPUASI.GLOBAL_ADDRESS) { if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) && isMemOpHasNoClobberedMemOperand(Load)) return SDValue(); @@ -3200,13 +3196,12 @@ // have the same legalization requirements as global and private // loads. // - LLVM_FALLTHROUGH; - case AMDGPUAS::FLAT_ADDRESS: + } else if (AS == AMDGPUASI.FLAT_ADDRESS) { if (NumElements > 4) return SplitVectorLoad(Op, DAG); // v4 loads are supported for private and global memory. return SDValue(); - case AMDGPUAS::PRIVATE_ADDRESS: + } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) { // Depending on the setting of the private_element_size field in the // resource descriptor, we can only make private accesses up to a certain // size. @@ -3225,7 +3220,7 @@ default: llvm_unreachable("unsupported private_element_size"); } - case AMDGPUAS::LOCAL_ADDRESS: + } else if (AS == AMDGPUASI.LOCAL_ADDRESS) { if (NumElements > 2) return SplitVectorLoad(Op, DAG); @@ -3234,9 +3229,8 @@ // If properly aligned, if we split we might be able to use ds_read_b64. return SplitVectorLoad(Op, DAG); - default: - return SDValue(); } + return SDValue(); } SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { @@ -3605,18 +3599,17 @@ SIMachineFunctionInfo *MFI = MF.getInfo(); // If there is a possibilty that flat instruction access scratch memory // then we need to use the same legalization rules we use for private. - if (AS == AMDGPUAS::FLAT_ADDRESS) + if (AS == AMDGPUASI.FLAT_ADDRESS) AS = MFI->hasFlatScratchInit() ? - AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS; + AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS; unsigned NumElements = VT.getVectorNumElements(); - switch (AS) { - case AMDGPUAS::GLOBAL_ADDRESS: - case AMDGPUAS::FLAT_ADDRESS: + if (AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.FLAT_ADDRESS) { if (NumElements > 4) return SplitVectorStore(Op, DAG); return SDValue(); - case AMDGPUAS::PRIVATE_ADDRESS: { + } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) { switch (Subtarget->getMaxPrivateElementSize()) { case 4: return scalarizeVectorStore(Store, DAG); @@ -3631,8 +3624,7 @@ default: llvm_unreachable("unsupported private_element_size"); } - } - case AMDGPUAS::LOCAL_ADDRESS: { + } else if (AS == AMDGPUASI.LOCAL_ADDRESS) { if (NumElements > 2) return SplitVectorStore(Op, DAG); @@ -3641,8 +3633,7 @@ // If properly aligned, if we split we might be able to use ds_write_b64. return SplitVectorStore(Op, DAG); - } - default: + } else { llvm_unreachable("unhandled address space"); } } @@ -3673,7 +3664,7 @@ unsigned AS = AtomicNode->getAddressSpace(); // No custom lowering required for local address space - if (!isFlatGlobalAddrSpace(AS)) + if (!isFlatGlobalAddrSpace(AS, AMDGPUASI)) return Op; // Non-local address space requires custom lowering for atomic compare @@ -3730,26 +3721,26 @@ /// the immediate offsets of a memory instruction for the given address space. static bool canFoldOffset(unsigned OffsetSize, unsigned AS, const SISubtarget &STI) { - switch (AS) { - case AMDGPUAS::GLOBAL_ADDRESS: + auto AMDGPUASI = STI.getAMDGPUAS(); + if (AS == AMDGPUASI.GLOBAL_ADDRESS) { // MUBUF instructions a 12-bit offset in bytes. return isUInt<12>(OffsetSize); - case AMDGPUAS::CONSTANT_ADDRESS: + } + if (AS == AMDGPUASI.CONSTANT_ADDRESS) { // SMRD instructions have an 8-bit offset in dwords on SI and // a 20-bit offset in bytes on VI. if (STI.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) return isUInt<20>(OffsetSize); else return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4); - case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: + } + if (AS == AMDGPUASI.LOCAL_ADDRESS || + AS == AMDGPUASI.REGION_ADDRESS) { // The single offset versions have a 16-bit offset in bytes. return isUInt<16>(OffsetSize); - case AMDGPUAS::PRIVATE_ADDRESS: - // Indirect register addressing does not use any offsets. - default: - return false; } + // Indirect register addressing does not use any offsets. + return false; } // (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2) @@ -3807,7 +3798,7 @@ // TODO: We could also do this for multiplies. unsigned AS = N->getAddressSpace(); - if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) { + if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUASI.PRIVATE_ADDRESS) { SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI); if (NewPtr) { SmallVector NewOps(N->op_begin(), N->op_end()); Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3741,7 +3741,7 @@ return AMDGPU::NoRegister; assert(!MI.memoperands_empty() && - (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS); + (*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS); FrameIndex = Addr->getIndex(); return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); @@ -3854,7 +3854,7 @@ return true; for (const MachineMemOperand *MMO : MI.memoperands()) { - if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS) + if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS) return true; } return false; Index: lib/Target/AMDGPU/SMInstructions.td =================================================================== --- lib/Target/AMDGPU/SMInstructions.td +++ lib/Target/AMDGPU/SMInstructions.td @@ -226,7 +226,7 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ auto Ld = cast(N); return Ld->getAlignment() >= 4 && - ((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + ((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && static_cast(getTargetLowering())->isMemOpUniform(N)) || (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && static_cast(getTargetLowering())->isMemOpUniform(N) && Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -10,6 +10,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H +#include "AMDGPU.h" #include "AMDKernelCodeT.h" #include "SIDefines.h" #include "llvm/ADT/StringRef.h" @@ -160,9 +161,9 @@ MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx); -bool isGroupSegment(const GlobalValue *GV); -bool isGlobalSegment(const GlobalValue *GV); -bool isReadOnlySegment(const GlobalValue *GV); +bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS); +bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS); +bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS); /// \returns True if constants should be emitted to .text section for given /// target triple \p TT, false otherwise. Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" @@ -354,16 +355,16 @@ ELF::SHF_AMDGPU_HSA_AGENT); } -bool isGroupSegment(const GlobalValue *GV) { - return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { + return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; } -bool isGlobalSegment(const GlobalValue *GV) { - return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; +bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) { + return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS; } -bool isReadOnlySegment(const GlobalValue *GV) { - return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; +bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) { + return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS; } bool shouldEmitConstantsToTextSection(const Triple &TT) { @@ -736,6 +737,60 @@ return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); } - } // end namespace AMDGPU + } // end namespace llvm + +const unsigned AMDGPUAS::MAX_COMMON_ADDRESS; +const unsigned AMDGPUAS::GLOBAL_ADDRESS; +const unsigned AMDGPUAS::LOCAL_ADDRESS; +const unsigned AMDGPUAS::PARAM_D_ADDRESS; +const unsigned AMDGPUAS::PARAM_I_ADDRESS; +const unsigned AMDGPUAS::CONSTANT_BUFFER_0; +const unsigned AMDGPUAS::CONSTANT_BUFFER_1; +const unsigned AMDGPUAS::CONSTANT_BUFFER_2; +const unsigned AMDGPUAS::CONSTANT_BUFFER_3; +const unsigned AMDGPUAS::CONSTANT_BUFFER_4; +const unsigned AMDGPUAS::CONSTANT_BUFFER_5; +const unsigned AMDGPUAS::CONSTANT_BUFFER_6; +const unsigned AMDGPUAS::CONSTANT_BUFFER_7; +const unsigned AMDGPUAS::CONSTANT_BUFFER_8; +const unsigned AMDGPUAS::CONSTANT_BUFFER_9; +const unsigned AMDGPUAS::CONSTANT_BUFFER_10; +const unsigned AMDGPUAS::CONSTANT_BUFFER_11; +const unsigned AMDGPUAS::CONSTANT_BUFFER_12; +const unsigned AMDGPUAS::CONSTANT_BUFFER_13; +const unsigned AMDGPUAS::CONSTANT_BUFFER_14; +const unsigned AMDGPUAS::CONSTANT_BUFFER_15; +const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE; + +namespace llvm { +namespace AMDGPU { + +AMDGPUAS getAMDGPUAS(Triple T) { + auto Env = T.getEnvironment(); + AMDGPUAS AS; + if (Env == Triple::AMDGIZ || Env == Triple::AMDGIZCL) { + AS.FLAT_ADDRESS = 0; + AS.CONSTANT_ADDRESS = 4; + AS.PRIVATE_ADDRESS = 5; + AS.REGION_ADDRESS = 2; + } + else { + AS.FLAT_ADDRESS = 4; + AS.CONSTANT_ADDRESS = 2; + AS.PRIVATE_ADDRESS = 0; + AS.REGION_ADDRESS = 5; + } + return AS; +} + +AMDGPUAS getAMDGPUAS(const TargetMachine &M) { + return getAMDGPUAS(M.getTargetTriple()); +} + +AMDGPUAS getAMDGPUAS(const Module &M) { + return getAMDGPUAS(Triple(M.getTargetTriple())); +} +} // namespace AMDGPU +} // namespace llvm