Index: llvm/trunk/include/llvm/Support/TargetParser.h =================================================================== --- llvm/trunk/include/llvm/Support/TargetParser.h +++ llvm/trunk/include/llvm/Support/TargetParser.h @@ -320,6 +320,13 @@ GK_AMDGCN_LAST = GK_GFX906, }; +/// Instruction set architecture version. +struct IsaVersion { + unsigned Major; + unsigned Minor; + unsigned Stepping; +}; + // This isn't comprehensive for now, just things that are needed from the // frontend driver. enum ArchFeatureKind : uint32_t { @@ -335,18 +342,22 @@ FEATURE_FAST_DENORMAL_F32 = 1 << 5 }; -GPUKind parseArchAMDGCN(StringRef CPU); -GPUKind parseArchR600(StringRef CPU); StringRef getArchNameAMDGCN(GPUKind AK); StringRef getArchNameR600(GPUKind AK); StringRef getCanonicalArchName(StringRef Arch); +GPUKind parseArchAMDGCN(StringRef CPU); +GPUKind parseArchR600(StringRef CPU); unsigned getArchAttrAMDGCN(GPUKind AK); unsigned getArchAttrR600(GPUKind AK); void fillValidArchListAMDGCN(SmallVectorImpl &Values); void fillValidArchListR600(SmallVectorImpl &Values); -} +StringRef getArchNameFromElfMach(unsigned ElfMach); +unsigned getElfMach(StringRef GPU); +IsaVersion getIsaVersion(StringRef GPU); + +} // namespace AMDGPU } // namespace llvm Index: llvm/trunk/lib/Support/TargetParser.cpp =================================================================== --- llvm/trunk/lib/Support/TargetParser.cpp +++ llvm/trunk/lib/Support/TargetParser.cpp @@ -17,11 +17,13 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include using namespace llvm; using namespace ARM; using namespace AArch64; +using namespace AMDGPU; namespace { @@ -947,6 +949,8 @@ TT.isOSWindows(); } +namespace { + struct GPUInfo { StringLiteral Name; StringLiteral CanonicalName; @@ -954,11 +958,9 @@ unsigned Features; }; -using namespace AMDGPU; -static constexpr GPUInfo R600GPUs[26] = { - // Name Canonical Kind Features - // Name - // +constexpr GPUInfo R600GPUs[26] = { + // Name Canonical Kind Features + // Name {{"r600"}, {"r600"}, GK_R600, FEATURE_NONE }, {{"rv630"}, {"r600"}, GK_R600, FEATURE_NONE }, {{"rv635"}, {"r600"}, GK_R600, FEATURE_NONE }, @@ -989,9 +991,9 @@ // This table should be sorted by the value of GPUKind // Don't bother listing the implicitly true features -static constexpr GPUInfo AMDGCNGPUs[32] = { - // Name Canonical Kind Features - // Name +constexpr GPUInfo AMDGCNGPUs[32] = { + // Name Canonical Kind Features + // Name {{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32}, {{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32}, {{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE}, @@ -1026,8 +1028,7 @@ {{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32}, }; -static const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, - ArrayRef Table) { +const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef Table) { GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE }; auto I = std::lower_bound(Table.begin(), Table.end(), Search, @@ -1040,6 +1041,8 @@ return I; } +} // namespace + StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) { if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs)) return Entry->CanonicalName; @@ -1092,3 +1095,118 @@ for (const auto C : R600GPUs) Values.push_back(C.Name); } + +StringRef AMDGPU::getArchNameFromElfMach(unsigned ElfMach) { + AMDGPU::GPUKind AK; + + switch (ElfMach) { + case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break; + case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break; + case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break; + case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break; + case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break; + case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break; + case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break; + case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break; + case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break; + case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break; + case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break; + case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break; + case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break; + case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break; + case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break; + case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break; + case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break; + } + + StringRef GPUName = getArchNameAMDGCN(AK); + if (GPUName != "") + return GPUName; + return getArchNameR600(AK); +} + +unsigned AMDGPU::getElfMach(StringRef GPU) { + AMDGPU::GPUKind AK = parseArchAMDGCN(GPU); + if (AK == AMDGPU::GPUKind::GK_NONE) + AK = parseArchR600(GPU); + + switch (AK) { + case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600; + case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630; + case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880; + case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670; + case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710; + case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730; + case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770; + case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR; + case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS; + case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER; + case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD; + case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO; + case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS; + case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS; + case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN; + case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS; + case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600; + case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601; + case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700; + case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701; + case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702; + case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703; + case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704; + case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801; + case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802; + case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803; + case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810; + case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900; + case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902; + case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904; + case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906; + case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE; + } + + llvm_unreachable("unknown GPU"); +} + +AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) { + if (GPU == "generic") + return {7, 0, 0}; + + AMDGPU::GPUKind AK = parseArchAMDGCN(GPU); + if (AK == AMDGPU::GPUKind::GK_NONE) + return {0, 0, 0}; + + switch (AK) { + case GK_GFX600: return {6, 0, 0}; + case GK_GFX601: return {6, 0, 1}; + case GK_GFX700: return {7, 0, 0}; + case GK_GFX701: return {7, 0, 1}; + case GK_GFX702: return {7, 0, 2}; + case GK_GFX703: return {7, 0, 3}; + case GK_GFX704: return {7, 0, 4}; + case GK_GFX801: return {8, 0, 1}; + case GK_GFX802: return {8, 0, 2}; + case GK_GFX803: return {8, 0, 3}; + case GK_GFX810: return {8, 1, 0}; + case GK_GFX900: return {9, 0, 0}; + case GK_GFX902: return {9, 0, 2}; + case GK_GFX904: return {9, 0, 4}; + case GK_GFX906: return {9, 0, 6}; + default: return {0, 0, 0}; + } +} Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -40,6 +40,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/Support/AMDGPUMetadata.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -134,9 +135,9 @@ getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1); // HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2. - IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(getSTI()->getFeatureBits()); + IsaVersion Version = getIsaVersion(getSTI()->getCPU()); getTargetStreamer()->EmitDirectiveHSACodeObjectISA( - ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); + Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU"); } void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { @@ -240,7 +241,7 @@ *getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo), CurrentProgramInfo.NumVGPRsForWavesPerEU, CurrentProgramInfo.NumSGPRsForWavesPerEU - - IsaInfo::getNumExtraSGPRs(getSTI()->getFeatureBits(), + IsaInfo::getNumExtraSGPRs(getSTI(), CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed), CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed, @@ -561,7 +562,7 @@ int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs( const GCNSubtarget &ST) const { - return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), + return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST, UsesVCC, UsesFlatScratch); } @@ -758,7 +759,7 @@ // 48 SGPRs - vcc, - flat_scr, -xnack int MaxSGPRGuess = - 47 - IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), true, + 47 - IsaInfo::getNumExtraSGPRs(getSTI(), true, ST.hasFlatAddressSpace()); MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess); MaxVGPR = std::max(MaxVGPR, 23); @@ -823,7 +824,7 @@ // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be // unified. unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs( - STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed); + getSTI(), ProgInfo.VCCUsed, ProgInfo.FlatUsed); // Check the addressable register limit before we add ExtraSGPRs. if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && @@ -905,9 +906,9 @@ } ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks( - STM.getFeatureBits(), ProgInfo.NumSGPRsForWavesPerEU); + getSTI(), ProgInfo.NumSGPRsForWavesPerEU); ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks( - STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU); + getSTI(), ProgInfo.NumVGPRsForWavesPerEU); // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue" @@ -1137,7 +1138,7 @@ const SIMachineFunctionInfo *MFI = MF.getInfo(); const GCNSubtarget &STM = MF.getSubtarget(); - AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits()); + AMDGPU::initDefaultAMDKernelCodeT(Out, getSTI()); Out.compute_pgm_resource_registers = CurrentProgramInfo.ComputePGMRSrc1 | Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -63,7 +63,6 @@ Triple TargetTriple; protected: - const FeatureBitset &SubtargetFeatureBits; bool Has16BitInsts; bool HasMadMixInsts; bool FP32Denormals; @@ -79,7 +78,7 @@ unsigned WavefrontSize; public: - AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits); + AMDGPUSubtarget(const Triple &TT); static const AMDGPUSubtarget &get(const MachineFunction &MF); static const AMDGPUSubtarget &get(const TargetMachine &TM, @@ -203,33 +202,21 @@ /// \returns Maximum number of work groups per compute unit supported by the /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits, - FlatWorkGroupSize); - } + virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0; /// \returns Minimum flat work group size supported by the subtarget. - unsigned getMinFlatWorkGroupSize() const { - return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits); - } + virtual unsigned getMinFlatWorkGroupSize() const = 0; /// \returns Maximum flat work group size supported by the subtarget. - unsigned getMaxFlatWorkGroupSize() const { - return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits); - } + virtual unsigned getMaxFlatWorkGroupSize() const = 0; /// \returns Maximum number of waves per execution unit supported by the /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits, - FlatWorkGroupSize); - } + virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0; /// \returns Minimum number of waves per execution unit supported by the /// subtarget. - unsigned getMinWavesPerEU() const { - return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits); - } + virtual unsigned getMinWavesPerEU() const = 0; unsigned getMaxWavesPerEU() const { return 10; } @@ -708,20 +695,19 @@ /// \returns Number of execution units per compute unit supported by the /// subtarget. unsigned getEUsPerCU() const { - return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getEUsPerCU(this); } /// \returns Maximum number of waves per compute unit supported by the /// subtarget without any kind of limitation. unsigned getMaxWavesPerCU() const { - return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getMaxWavesPerCU(this); } /// \returns Maximum number of waves per compute unit supported by the /// subtarget and limited by given \p FlatWorkGroupSize. unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(), - FlatWorkGroupSize); + return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize); } /// \returns Maximum number of waves per execution unit supported by the @@ -733,8 +719,7 @@ /// \returns Number of waves per work group supported by the subtarget and /// limited by given \p FlatWorkGroupSize. unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const { - return AMDGPU::IsaInfo::getWavesPerWorkGroup( - MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize); + return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize); } // static wrappers @@ -853,39 +838,34 @@ /// \returns SGPR allocation granularity supported by the subtarget. unsigned getSGPRAllocGranule() const { - return AMDGPU::IsaInfo::getSGPRAllocGranule( - MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getSGPRAllocGranule(this); } /// \returns SGPR encoding granularity supported by the subtarget. unsigned getSGPREncodingGranule() const { - return AMDGPU::IsaInfo::getSGPREncodingGranule( - MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getSGPREncodingGranule(this); } /// \returns Total number of SGPRs supported by the subtarget. unsigned getTotalNumSGPRs() const { - return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getTotalNumSGPRs(this); } /// \returns Addressable number of SGPRs supported by the subtarget. unsigned getAddressableNumSGPRs() const { - return AMDGPU::IsaInfo::getAddressableNumSGPRs( - MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getAddressableNumSGPRs(this); } /// \returns Minimum number of SGPRs that meets the given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMinNumSGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(), - WavesPerEU); + return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU); } /// \returns Maximum number of SGPRs that meets the given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { - return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(), - WavesPerEU, Addressable); + return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable); } /// \returns Reserved number of SGPRs for given function \p MF. @@ -903,39 +883,34 @@ /// \returns VGPR allocation granularity supported by the subtarget. unsigned getVGPRAllocGranule() const { - return AMDGPU::IsaInfo::getVGPRAllocGranule( - MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getVGPRAllocGranule(this); } /// \returns VGPR encoding granularity supported by the subtarget. unsigned getVGPREncodingGranule() const { - return AMDGPU::IsaInfo::getVGPREncodingGranule( - MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getVGPREncodingGranule(this); } /// \returns Total number of VGPRs supported by the subtarget. unsigned getTotalNumVGPRs() const { - return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getTotalNumVGPRs(this); } /// \returns Addressable number of VGPRs supported by the subtarget. unsigned getAddressableNumVGPRs() const { - return AMDGPU::IsaInfo::getAddressableNumVGPRs( - MCSubtargetInfo::getFeatureBits()); + return AMDGPU::IsaInfo::getAddressableNumVGPRs(this); } /// \returns Minimum number of VGPRs that meets given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMinNumVGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(), - WavesPerEU); + return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU); } /// \returns Maximum number of VGPRs that meets given number of waves per /// execution unit requirement supported by the subtarget. unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(), - WavesPerEU); + return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU); } /// \returns Maximum number of VGPRs that meets number of waves per execution @@ -951,6 +926,34 @@ void getPostRAMutations( std::vector> &Mutations) const override; + + /// \returns Maximum number of work groups per compute unit supported by the + /// subtarget and limited by given \p FlatWorkGroupSize. + unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); + } + + /// \returns Minimum flat work group size supported by the subtarget. + unsigned getMinFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); + } + + /// \returns Maximum flat work group size supported by the subtarget. + unsigned getMaxFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); + } + + /// \returns Maximum number of waves per execution unit supported by the + /// subtarget and limited by given \p FlatWorkGroupSize. + unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize); + } + + /// \returns Minimum number of waves per execution unit supported by the + /// subtarget. + unsigned getMinWavesPerEU() const override { + return AMDGPU::IsaInfo::getMinWavesPerEU(this); + } }; class R600Subtarget final : public R600GenSubtargetInfo, @@ -1061,6 +1064,34 @@ bool enableSubRegLiveness() const override { return true; } + + /// \returns Maximum number of work groups per compute unit supported by the + /// subtarget and limited by given \p FlatWorkGroupSize. + unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); + } + + /// \returns Minimum flat work group size supported by the subtarget. + unsigned getMinFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); + } + + /// \returns Maximum flat work group size supported by the subtarget. + unsigned getMaxFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); + } + + /// \returns Maximum number of waves per execution unit supported by the + /// subtarget and limited by given \p FlatWorkGroupSize. + unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize); + } + + /// \returns Minimum number of waves per execution unit supported by the + /// subtarget. + unsigned getMinWavesPerEU() const override { + return AMDGPU::IsaInfo::getMinWavesPerEU(this); + } }; } // end namespace llvm Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -124,10 +124,8 @@ return *this; } -AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, - const FeatureBitset &FeatureBits) : +AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) : TargetTriple(TT), - SubtargetFeatureBits(FeatureBits), Has16BitInsts(false), HasMadMixInsts(false), FP32Denormals(false), @@ -144,9 +142,9 @@ { } GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, - const GCNTargetMachine &TM) : + const GCNTargetMachine &TM) : AMDGPUGenSubtargetInfo(TT, GPU, FS), - AMDGPUSubtarget(TT, getFeatureBits()), + AMDGPUSubtarget(TT), TargetTriple(TT), Gen(SOUTHERN_ISLANDS), IsaVersion(ISAVersion0_0_0), @@ -448,7 +446,7 @@ R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM) : R600GenSubtargetInfo(TT, GPU, FS), - AMDGPUSubtarget(TT, getFeatureBits()), + AMDGPUSubtarget(TT), InstrInfo(*this), FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), FMA(false), Index: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -49,6 +49,7 @@ #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SMLoc.h" +#include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include @@ -917,8 +918,7 @@ // Currently there is none suitable machinery in the core llvm-mc for this. // MCSymbol::isRedefinable is intended for another purpose, and // AsmParser::parseDirectiveSet() cannot be specialized for specific target. - AMDGPU::IsaInfo::IsaVersion ISA = - AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); + AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); MCContext &Ctx = getContext(); if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { MCSymbol *Sym = @@ -1826,7 +1826,7 @@ unsigned DwordRegIndex, unsigned RegWidth) { // Symbols are only defined for GCN targets - if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6) + if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) return true; auto SymbolName = getGprCountSymbolName(RegKind); @@ -2637,18 +2637,18 @@ unsigned &SGPRBlocks) { // TODO(scott.linder): These calculations are duplicated from // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. - IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features); + IsaVersion Version = getIsaVersion(getSTI().getCPU()); unsigned NumVGPRs = NextFreeVGPR; unsigned NumSGPRs = NextFreeSGPR; - unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features); + unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI()); if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && NumSGPRs > MaxAddressableNumSGPRs) return OutOfRangeError(SGPRRange); NumSGPRs += - IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed); + IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && NumSGPRs > MaxAddressableNumSGPRs) @@ -2657,8 +2657,8 @@ if (Features.test(FeatureSGPRInitBug)) NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; - VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs); - SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs); + VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); + SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); return false; } @@ -2678,8 +2678,7 @@ StringSet<> Seen; - IsaInfo::IsaVersion IVersion = - IsaInfo::getIsaVersion(getSTI().getFeatureBits()); + IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); SMRange VGPRRange; uint64_t NextFreeVGPR = 0; @@ -2938,8 +2937,7 @@ // If this directive has no arguments, then use the ISA version for the // targeted GPU. if (getLexer().is(AsmToken::EndOfStatement)) { - AMDGPU::IsaInfo::IsaVersion ISA = - AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); + AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); @@ -3001,7 +2999,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { amd_kernel_code_t Header; - AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits()); + AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); while (true) { // Lex EndOfStatement. This is in a while loop, because lexing a comment @@ -3679,12 +3677,12 @@ static bool encodeCnt( - const AMDGPU::IsaInfo::IsaVersion ISA, + const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, - unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned), - unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned)) + unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), + unsigned (*decode)(const IsaVersion &Version, unsigned)) { bool Failed = false; @@ -3715,8 +3713,7 @@ if (getParser().parseAbsoluteExpression(CntVal)) return true; - AMDGPU::IsaInfo::IsaVersion ISA = - AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); + AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); bool Failed = true; bool Sat = CntName.endswith("_sat"); @@ -3751,8 +3748,7 @@ OperandMatchResultTy AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { - AMDGPU::IsaInfo::IsaVersion ISA = - AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); + AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); int64_t Waitcnt = getWaitcntBitMask(ISA); SMLoc S = Parser.getTok().getLoc(); Index: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -1155,8 +1155,7 @@ void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - AMDGPU::IsaInfo::IsaVersion ISA = - AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits()); + AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU()); unsigned SImm16 = MI->getOperand(OpNo).getImm(); unsigned Vmcnt, Expcnt, Lgkmcnt; Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -31,13 +31,7 @@ protected: MCContext &getContext() const { return Streamer.getContext(); } - /// \returns Equivalent EF_AMDGPU_MACH_* value for given \p GPU name. - unsigned getMACH(StringRef GPU) const; - public: - /// \returns Equivalent GPU name for an EF_AMDGPU_MACH_* value. - static const char *getMachName(unsigned Mach); - AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0; Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -27,6 +27,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetParser.h" namespace llvm { #include "AMDGPUPTNote.h" @@ -39,84 +40,6 @@ // AMDGPUTargetStreamer //===----------------------------------------------------------------------===// -static const struct { - const char *Name; - unsigned Mach; -} MachTable[] = { - // Radeon HD 2000/3000 Series (R600). - { "r600", ELF::EF_AMDGPU_MACH_R600_R600 }, - { "r630", ELF::EF_AMDGPU_MACH_R600_R630 }, - { "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 }, - { "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 }, - // Radeon HD 4000 Series (R700). - { "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 }, - { "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 }, - { "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 }, - // Radeon HD 5000 Series (Evergreen). - { "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR }, - { "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS }, - { "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER }, - { "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD }, - { "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO }, - // Radeon HD 6000 Series (Northern Islands). - { "barts", ELF::EF_AMDGPU_MACH_R600_BARTS }, - { "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS }, - { "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN }, - { "turks", ELF::EF_AMDGPU_MACH_R600_TURKS }, - // AMDGCN GFX6. - { "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 }, - { "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 }, - { "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, - { "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, - { "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, - { "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, - { "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 }, - // AMDGCN GFX7. - { "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 }, - { "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 }, - { "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 }, - { "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 }, - { "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 }, - { "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 }, - { "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 }, - { "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 }, - { "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 }, - { "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 }, - // AMDGCN GFX8. - { "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 }, - { "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 }, - { "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 }, - { "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 }, - { "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 }, - { "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 }, - { "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 }, - { "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 }, - { "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 }, - { "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 }, - { "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 }, - // AMDGCN GFX9. - { "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 }, - { "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 }, - { "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 }, - { "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 }, - // Not specified processor. - { nullptr, ELF::EF_AMDGPU_MACH_NONE } -}; - -unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const { - auto Entry = MachTable; - for (; Entry->Name && GPU != Entry->Name; ++Entry) - ; - return Entry->Mach; -} - -const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) { - auto Entry = MachTable; - for (; Entry->Name && Mach != Entry->Mach; ++Entry) - ; - return Entry->Name; -} - bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) { HSAMD::Metadata HSAMetadata; if (HSAMD::fromString(HSAMetadataString, HSAMetadata)) @@ -205,7 +128,7 @@ bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) { amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor(); - IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits()); + IsaVersion IVersion = getIsaVersion(STI.getCPU()); OS << "\t.amdhsa_kernel " << KernelName << '\n'; @@ -342,7 +265,7 @@ unsigned EFlags = MCA.getELFHeaderEFlags(); EFlags &= ~ELF::EF_AMDGPU_MACH; - EFlags |= getMACH(STI.getCPU()); + EFlags |= getElfMach(STI.getCPU()); EFlags &= ~ELF::EF_AMDGPU_XNACK; if (AMDGPU::hasXNACK(STI)) Index: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -369,7 +369,7 @@ const SIRegisterInfo *TRI = nullptr; const MachineRegisterInfo *MRI = nullptr; const MachineLoopInfo *MLI = nullptr; - AMDGPU::IsaInfo::IsaVersion IV; + AMDGPU::IsaVersion IV; DenseSet BlockVisitedSet; DenseSet TrackedWaitcntSet; @@ -1841,7 +1841,7 @@ TRI = &TII->getRegisterInfo(); MRI = &MF.getRegInfo(); MLI = &getAnalysis(); - IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits()); + IV = AMDGPU::getIsaVersion(ST->getCPU()); const SIMachineFunctionInfo *MFI = MF.getInfo(); ForceEmitZeroWaitcnts = ForceEmitZeroFlag; Index: llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -253,7 +253,7 @@ /// Instruction info. const SIInstrInfo *TII = nullptr; - IsaInfo::IsaVersion IV; + IsaVersion IV; SICacheControl(const GCNSubtarget &ST); @@ -605,7 +605,7 @@ SICacheControl::SICacheControl(const GCNSubtarget &ST) { TII = ST.getInstrInfo(); - IV = IsaInfo::getIsaVersion(ST.getFeatureBits()); + IV = getIsaVersion(ST.getCPU()); } /* static */ Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -19,6 +19,7 @@ #include "llvm/Support/AMDHSAKernelDescriptor.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetParser.h" #include #include #include @@ -56,16 +57,6 @@ TRAP_NUM_SGPRS = 16 }; -/// Instruction set architecture version. -struct IsaVersion { - unsigned Major; - unsigned Minor; - unsigned Stepping; -}; - -/// \returns Isa version for given subtarget \p Features. -IsaVersion getIsaVersion(const FeatureBitset &Features); - /// Streams isa version string for given subtarget \p STI into \p Stream. void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream); @@ -73,114 +64,114 @@ /// false otherwise. bool hasCodeObjectV3(const MCSubtargetInfo *STI); -/// \returns Wavefront size for given subtarget \p Features. -unsigned getWavefrontSize(const FeatureBitset &Features); +/// \returns Wavefront size for given subtarget \p STI. +unsigned getWavefrontSize(const MCSubtargetInfo *STI); -/// \returns Local memory size in bytes for given subtarget \p Features. -unsigned getLocalMemorySize(const FeatureBitset &Features); +/// \returns Local memory size in bytes for given subtarget \p STI. +unsigned getLocalMemorySize(const MCSubtargetInfo *STI); /// \returns Number of execution units per compute unit for given subtarget \p -/// Features. -unsigned getEUsPerCU(const FeatureBitset &Features); +/// STI. +unsigned getEUsPerCU(const MCSubtargetInfo *STI); /// \returns Maximum number of work groups per compute unit for given subtarget -/// \p Features and limited by given \p FlatWorkGroupSize. -unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, +/// \p STI and limited by given \p FlatWorkGroupSize. +unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize); /// \returns Maximum number of waves per compute unit for given subtarget \p -/// Features without any kind of limitation. -unsigned getMaxWavesPerCU(const FeatureBitset &Features); +/// STI without any kind of limitation. +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI); /// \returns Maximum number of waves per compute unit for given subtarget \p -/// Features and limited by given \p FlatWorkGroupSize. -unsigned getMaxWavesPerCU(const FeatureBitset &Features, +/// STI and limited by given \p FlatWorkGroupSize. +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize); /// \returns Minimum number of waves per execution unit for given subtarget \p -/// Features. -unsigned getMinWavesPerEU(const FeatureBitset &Features); +/// STI. +unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); /// \returns Maximum number of waves per execution unit for given subtarget \p -/// Features without any kind of limitation. +/// STI without any kind of limitation. unsigned getMaxWavesPerEU(); /// \returns Maximum number of waves per execution unit for given subtarget \p -/// Features and limited by given \p FlatWorkGroupSize. -unsigned getMaxWavesPerEU(const FeatureBitset &Features, +/// STI and limited by given \p FlatWorkGroupSize. +unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize); -/// \returns Minimum flat work group size for given subtarget \p Features. -unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features); +/// \returns Minimum flat work group size for given subtarget \p STI. +unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); -/// \returns Maximum flat work group size for given subtarget \p Features. -unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features); +/// \returns Maximum flat work group size for given subtarget \p STI. +unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); -/// \returns Number of waves per work group for given subtarget \p Features and +/// \returns Number of waves per work group for given subtarget \p STI and /// limited by given \p FlatWorkGroupSize. -unsigned getWavesPerWorkGroup(const FeatureBitset &Features, +unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize); -/// \returns SGPR allocation granularity for given subtarget \p Features. -unsigned getSGPRAllocGranule(const FeatureBitset &Features); +/// \returns SGPR allocation granularity for given subtarget \p STI. +unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); -/// \returns SGPR encoding granularity for given subtarget \p Features. -unsigned getSGPREncodingGranule(const FeatureBitset &Features); +/// \returns SGPR encoding granularity for given subtarget \p STI. +unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); -/// \returns Total number of SGPRs for given subtarget \p Features. -unsigned getTotalNumSGPRs(const FeatureBitset &Features); +/// \returns Total number of SGPRs for given subtarget \p STI. +unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); -/// \returns Addressable number of SGPRs for given subtarget \p Features. -unsigned getAddressableNumSGPRs(const FeatureBitset &Features); +/// \returns Addressable number of SGPRs for given subtarget \p STI. +unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); /// \returns Minimum number of SGPRs that meets the given number of waves per -/// execution unit requirement for given subtarget \p Features. -unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU); +/// execution unit requirement for given subtarget \p STI. +unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); /// \returns Maximum number of SGPRs that meets the given number of waves per -/// execution unit requirement for given subtarget \p Features. -unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, +/// execution unit requirement for given subtarget \p STI. +unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable); /// \returns Number of extra SGPRs implicitly required by given subtarget \p -/// Features when the given special registers are used. -unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, +/// STI when the given special registers are used. +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed); /// \returns Number of extra SGPRs implicitly required by given subtarget \p -/// Features when the given special registers are used. XNACK is inferred from -/// \p Features. -unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, +/// STI when the given special registers are used. XNACK is inferred from +/// \p STI. +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed); -/// \returns Number of SGPR blocks needed for given subtarget \p Features when +/// \returns Number of SGPR blocks needed for given subtarget \p STI when /// \p NumSGPRs are used. \p NumSGPRs should already include any special /// register counts. -unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs); +unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); -/// \returns VGPR allocation granularity for given subtarget \p Features. -unsigned getVGPRAllocGranule(const FeatureBitset &Features); +/// \returns VGPR allocation granularity for given subtarget \p STI. +unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI); -/// \returns VGPR encoding granularity for given subtarget \p Features. -unsigned getVGPREncodingGranule(const FeatureBitset &Features); +/// \returns VGPR encoding granularity for given subtarget \p STI. +unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI); -/// \returns Total number of VGPRs for given subtarget \p Features. -unsigned getTotalNumVGPRs(const FeatureBitset &Features); +/// \returns Total number of VGPRs for given subtarget \p STI. +unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); -/// \returns Addressable number of VGPRs for given subtarget \p Features. -unsigned getAddressableNumVGPRs(const FeatureBitset &Features); +/// \returns Addressable number of VGPRs for given subtarget \p STI. +unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); /// \returns Minimum number of VGPRs that meets given number of waves per -/// execution unit requirement for given subtarget \p Features. -unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU); +/// execution unit requirement for given subtarget \p STI. +unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); /// \returns Maximum number of VGPRs that meets given number of waves per -/// execution unit requirement for given subtarget \p Features. -unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU); +/// execution unit requirement for given subtarget \p STI. +unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); -/// \returns Number of VGPR blocks needed for given subtarget \p Features when +/// \returns Number of VGPR blocks needed for given subtarget \p STI when /// \p NumVGPRs are used. -unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs); +unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); } // end namespace IsaInfo @@ -233,7 +224,7 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen); void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, - const FeatureBitset &Features); + const MCSubtargetInfo *STI); amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(); @@ -268,25 +259,25 @@ bool OnlyFirstRequired = false); /// \returns Vmcnt bit mask for given isa \p Version. -unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version); +unsigned getVmcntBitMask(const IsaVersion &Version); /// \returns Expcnt bit mask for given isa \p Version. -unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version); +unsigned getExpcntBitMask(const IsaVersion &Version); /// \returns Lgkmcnt bit mask for given isa \p Version. -unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version); +unsigned getLgkmcntBitMask(const IsaVersion &Version); /// \returns Waitcnt bit mask for given isa \p Version. -unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version); +unsigned getWaitcntBitMask(const IsaVersion &Version); /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. -unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); +unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. -unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); +unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. -unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); +unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and @@ -297,19 +288,19 @@ /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) /// \p Expcnt = \p Waitcnt[6:4] /// \p Lgkmcnt = \p Waitcnt[11:8] -void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. -unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt); /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. -unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt); /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. -unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt); /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa @@ -324,7 +315,7 @@ /// /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given /// isa \p Version. -unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, +unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); unsigned getInitialPSInputAddr(const Function &F); Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -137,68 +137,18 @@ namespace IsaInfo { -IsaVersion getIsaVersion(const FeatureBitset &Features) { - // GCN GFX6 (Southern Islands (SI)). - if (Features.test(FeatureISAVersion6_0_0)) - return {6, 0, 0}; - if (Features.test(FeatureISAVersion6_0_1)) - return {6, 0, 1}; - - // GCN GFX7 (Sea Islands (CI)). - if (Features.test(FeatureISAVersion7_0_0)) - return {7, 0, 0}; - if (Features.test(FeatureISAVersion7_0_1)) - return {7, 0, 1}; - if (Features.test(FeatureISAVersion7_0_2)) - return {7, 0, 2}; - if (Features.test(FeatureISAVersion7_0_3)) - return {7, 0, 3}; - if (Features.test(FeatureISAVersion7_0_4)) - return {7, 0, 4}; - if (Features.test(FeatureSeaIslands)) - return {7, 0, 0}; - - // GCN GFX8 (Volcanic Islands (VI)). - if (Features.test(FeatureISAVersion8_0_1)) - return {8, 0, 1}; - if (Features.test(FeatureISAVersion8_0_2)) - return {8, 0, 2}; - if (Features.test(FeatureISAVersion8_0_3)) - return {8, 0, 3}; - if (Features.test(FeatureISAVersion8_1_0)) - return {8, 1, 0}; - if (Features.test(FeatureVolcanicIslands)) - return {8, 0, 0}; - - // GCN GFX9. - if (Features.test(FeatureISAVersion9_0_0)) - return {9, 0, 0}; - if (Features.test(FeatureISAVersion9_0_2)) - return {9, 0, 2}; - if (Features.test(FeatureISAVersion9_0_4)) - return {9, 0, 4}; - if (Features.test(FeatureISAVersion9_0_6)) - return {9, 0, 6}; - if (Features.test(FeatureGFX9)) - return {9, 0, 0}; - - if (Features.test(FeatureSouthernIslands)) - return {0, 0, 0}; - return {7, 0, 0}; -} - void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) { auto TargetTriple = STI->getTargetTriple(); - auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits()); + auto Version = getIsaVersion(STI->getCPU()); Stream << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName() << '-' << TargetTriple.getOSName() << '-' << TargetTriple.getEnvironmentName() << '-' << "gfx" - << ISAVersion.Major - << ISAVersion.Minor - << ISAVersion.Stepping; + << Version.Major + << Version.Minor + << Version.Stepping; if (hasXNACK(*STI)) Stream << "+xnack"; @@ -210,49 +160,49 @@ return STI->getFeatureBits().test(FeatureCodeObjectV3); } -unsigned getWavefrontSize(const FeatureBitset &Features) { - if (Features.test(FeatureWavefrontSize16)) +unsigned getWavefrontSize(const MCSubtargetInfo *STI) { + if (STI->getFeatureBits().test(FeatureWavefrontSize16)) return 16; - if (Features.test(FeatureWavefrontSize32)) + if (STI->getFeatureBits().test(FeatureWavefrontSize32)) return 32; return 64; } -unsigned getLocalMemorySize(const FeatureBitset &Features) { - if (Features.test(FeatureLocalMemorySize32768)) +unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { + if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) return 32768; - if (Features.test(FeatureLocalMemorySize65536)) + if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) return 65536; return 0; } -unsigned getEUsPerCU(const FeatureBitset &Features) { +unsigned getEUsPerCU(const MCSubtargetInfo *STI) { return 4; } -unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, +unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize) { - if (!Features.test(FeatureGCN)) + if (!STI->getFeatureBits().test(FeatureGCN)) return 8; - unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); + unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); if (N == 1) return 40; N = 40 / N; return std::min(N, 16u); } -unsigned getMaxWavesPerCU(const FeatureBitset &Features) { - return getMaxWavesPerEU() * getEUsPerCU(Features); +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) { + return getMaxWavesPerEU() * getEUsPerCU(STI); } -unsigned getMaxWavesPerCU(const FeatureBitset &Features, +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize) { - return getWavesPerWorkGroup(Features, FlatWorkGroupSize); + return getWavesPerWorkGroup(STI, FlatWorkGroupSize); } -unsigned getMinWavesPerEU(const FeatureBitset &Features) { +unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; } @@ -261,89 +211,89 @@ return 10; } -unsigned getMaxWavesPerEU(const FeatureBitset &Features, +unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize) { - return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), - getEUsPerCU(Features)) / getEUsPerCU(Features); + return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize), + getEUsPerCU(STI)) / getEUsPerCU(STI); } -unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { +unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { return 1; } -unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { +unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { return 2048; } -unsigned getWavesPerWorkGroup(const FeatureBitset &Features, +unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize) { - return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / - getWavefrontSize(Features); + return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) / + getWavefrontSize(STI); } -unsigned getSGPRAllocGranule(const FeatureBitset &Features) { - IsaVersion Version = getIsaVersion(Features); +unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { + IsaVersion Version = getIsaVersion(STI->getCPU()); if (Version.Major >= 8) return 16; return 8; } -unsigned getSGPREncodingGranule(const FeatureBitset &Features) { +unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { return 8; } -unsigned getTotalNumSGPRs(const FeatureBitset &Features) { - IsaVersion Version = getIsaVersion(Features); +unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) { + IsaVersion Version = getIsaVersion(STI->getCPU()); if (Version.Major >= 8) return 800; return 512; } -unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { - if (Features.test(FeatureSGPRInitBug)) +unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { + if (STI->getFeatureBits().test(FeatureSGPRInitBug)) return FIXED_NUM_SGPRS_FOR_INIT_BUG; - IsaVersion Version = getIsaVersion(Features); + IsaVersion Version = getIsaVersion(STI->getCPU()); if (Version.Major >= 8) return 102; return 104; } -unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { +unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { assert(WavesPerEU != 0); if (WavesPerEU >= getMaxWavesPerEU()) return 0; - unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1); - if (Features.test(FeatureTrapHandler)) + unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); + if (STI->getFeatureBits().test(FeatureTrapHandler)) MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS); - MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1; - return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); + MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1; + return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI)); } -unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, +unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable) { assert(WavesPerEU != 0); - IsaVersion Version = getIsaVersion(Features); - unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); + IsaVersion Version = getIsaVersion(STI->getCPU()); + unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); if (Version.Major >= 8 && !Addressable) AddressableNumSGPRs = 112; - unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU; - if (Features.test(FeatureTrapHandler)) + unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; + if (STI->getFeatureBits().test(FeatureTrapHandler)) MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS); - MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features)); + MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI)); return std::min(MaxNumSGPRs, AddressableNumSGPRs); } -unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed) { unsigned ExtraSGPRs = 0; if (VCCUsed) ExtraSGPRs = 2; - IsaVersion Version = getIsaVersion(Features); + IsaVersion Version = getIsaVersion(STI->getCPU()); if (Version.Major < 8) { if (FlatScrUsed) ExtraSGPRs = 4; @@ -358,74 +308,74 @@ return ExtraSGPRs; } -unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed) { - return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, - Features[AMDGPU::FeatureXNACK]); + return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed, + STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); } -unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) { - NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features)); +unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { + NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI)); // SGPRBlocks is actual number of SGPR blocks minus 1. - return NumSGPRs / getSGPREncodingGranule(Features) - 1; + return NumSGPRs / getSGPREncodingGranule(STI) - 1; } -unsigned getVGPRAllocGranule(const FeatureBitset &Features) { +unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) { return 4; } -unsigned getVGPREncodingGranule(const FeatureBitset &Features) { - return getVGPRAllocGranule(Features); +unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) { + return getVGPRAllocGranule(STI); } -unsigned getTotalNumVGPRs(const FeatureBitset &Features) { +unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { return 256; } -unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { - return getTotalNumVGPRs(Features); +unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { + return getTotalNumVGPRs(STI); } -unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { +unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { assert(WavesPerEU != 0); if (WavesPerEU >= getMaxWavesPerEU()) return 0; unsigned MinNumVGPRs = - alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), - getVGPRAllocGranule(Features)) + 1; - return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); + alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1), + getVGPRAllocGranule(STI)) + 1; + return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI)); } -unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { +unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { assert(WavesPerEU != 0); - unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, - getVGPRAllocGranule(Features)); - unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); + unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU, + getVGPRAllocGranule(STI)); + unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI); return std::min(MaxNumVGPRs, AddressableNumVGPRs); } -unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) { - NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features)); +unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) { + NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI)); // VGPRBlocks is actual number of VGPR blocks minus 1. - return NumVGPRs / getVGPREncodingGranule(Features) - 1; + return NumVGPRs / getVGPREncodingGranule(STI) - 1; } } // end namespace IsaInfo void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, - const FeatureBitset &Features) { - IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); + const MCSubtargetInfo *STI) { + IsaVersion Version = getIsaVersion(STI->getCPU()); memset(&Header, 0, sizeof(Header)); Header.amd_kernel_code_version_major = 1; Header.amd_kernel_code_version_minor = 2; Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU - Header.amd_machine_version_major = ISA.Major; - Header.amd_machine_version_minor = ISA.Minor; - Header.amd_machine_version_stepping = ISA.Stepping; + Header.amd_machine_version_major = Version.Major; + Header.amd_machine_version_minor = Version.Minor; + Header.amd_machine_version_stepping = Version.Stepping; Header.kernel_code_entry_byte_offset = sizeof(Header); // wavefront_size is specified as a power of 2: 2^6 = 64 threads. Header.wavefront_size = 6; @@ -513,7 +463,7 @@ return Ints; } -unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { +unsigned getVmcntBitMask(const IsaVersion &Version) { unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; if (Version.Major < 9) return VmcntLo; @@ -522,15 +472,15 @@ return VmcntLo | VmcntHi; } -unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { +unsigned getExpcntBitMask(const IsaVersion &Version) { return (1 << getExpcntBitWidth()) - 1; } -unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { +unsigned getLgkmcntBitMask(const IsaVersion &Version) { return (1 << getLgkmcntBitWidth()) - 1; } -unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { +unsigned getWaitcntBitMask(const IsaVersion &Version) { unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); @@ -542,7 +492,7 @@ return Waitcnt | VmcntHi; } -unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { +unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); if (Version.Major < 9) @@ -554,22 +504,22 @@ return VmcntLo | VmcntHi; } -unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { +unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); } -unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { +unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); } -void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { Vmcnt = decodeVmcnt(Version, Waitcnt); Expcnt = decodeExpcnt(Version, Waitcnt); Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); } -unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt) { Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); @@ -580,17 +530,17 @@ return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); } -unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt) { return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); } -unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt) { return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); } -unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, +unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { unsigned Waitcnt = getWaitcntBitMask(Version); Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); Index: llvm/trunk/test/CodeGen/AMDGPU/gfx902-without-xnack.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/gfx902-without-xnack.ll +++ llvm/trunk/test/CodeGen/AMDGPU/gfx902-without-xnack.ll @@ -0,0 +1,8 @@ +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck %s + +; CHECK: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU" +define amdgpu_kernel void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind { + store float 0.0, float addrspace(1)* %out0 + ret void +} + Index: llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s +++ llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s @@ -2,5 +2,5 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s .hsa_code_object_isa -// GFX8: .hsa_code_object_isa 8,0,0,"AMD","AMDGPU" +// GFX8: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" // GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"