Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -114,12 +114,13 @@ TS->EmitDirectiveHSACodeObjectVersion(2, 1); - AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits()); + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(STI->getFeatureBits()); TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); // Emit runtime metadata. - TS->EmitRuntimeMetadata(M); + TS->EmitRuntimeMetadata(STI->getFeatureBits(), M); } bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( @@ -346,6 +347,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, const MachineFunction &MF) const { const SISubtarget &STM = MF.getSubtarget(); + const FeatureBitset &STF = STM.getFeatureBits(); const SIMachineFunctionInfo *MFI = MF.getInfo(); uint64_t CodeSize = 0; unsigned MaxSGPR = 0; @@ -463,6 +465,7 @@ } unsigned ExtraSGPRs = 0; + unsigned ExtraVGPRs = 0; if (VCCUsed) ExtraSGPRs = 2; @@ -481,8 +484,9 @@ // Record first reserved register and reserved register count fields, and // update max register counts if "amdgpu-debugger-reserve-regs" attribute was // requested. - ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0; - ProgInfo.ReservedVGPRCount = RI->getNumDebuggerReservedVGPRs(STM); + ExtraVGPRs = RI->getReservedNumVGPRs(MF); + ProgInfo.ReservedVGPRFirst = ExtraVGPRs > 0 ? MaxVGPR + 1 : 0; + ProgInfo.ReservedVGPRCount = ExtraVGPRs; // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue" @@ -497,14 +501,16 @@ // Check the addressable register limit before we add ExtraSGPRs. if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && !STM.hasSGPRInitBug()) { - unsigned MaxAddressableNumSGPRs = STM.getMaxNumSGPRs(); + unsigned MaxAddressableNumSGPRs = + AMDGPU::IsaInfo::getAddressableNumSGPRs(STF); if (MaxSGPR + 1 > MaxAddressableNumSGPRs) { // This can happen due to a compiler bug or when using inline asm. LLVMContext &Ctx = MF.getFunction()->getContext(); DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "addressable scalar registers", MaxSGPR + 1, DS_Error, - DK_ResourceLimit, MaxAddressableNumSGPRs); + DK_ResourceLimit, + MaxAddressableNumSGPRs); Ctx.diagnose(Diag); MaxSGPR = MaxAddressableNumSGPRs - 1; } @@ -512,7 +518,7 @@ // Account for extra SGPRs and VGPRs reserved for debugger use. MaxSGPR += ExtraSGPRs; - MaxVGPR += RI->getNumDebuggerReservedVGPRs(STM); + MaxVGPR += ExtraVGPRs; // We found the maximum register index. They start at 0, so add one to get the // number of registers. @@ -522,31 +528,36 @@ // Adjust number of registers used to meet default/requested minimum/maximum // number of waves per execution unit request. ProgInfo.NumSGPRsForWavesPerEU = std::max( - ProgInfo.NumSGPR, RI->getMinNumSGPRs(STM, MFI->getMaxWavesPerEU())); + ProgInfo.NumSGPR, + AMDGPU::IsaInfo::getMinNumSGPRs(STF, MFI->getMaxWavesPerEU())); ProgInfo.NumVGPRsForWavesPerEU = std::max( - ProgInfo.NumVGPR, RI->getMinNumVGPRs(MFI->getMaxWavesPerEU())); + ProgInfo.NumVGPR, + AMDGPU::IsaInfo::getMinNumVGPRs(STF, MFI->getMaxWavesPerEU())); if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS || STM.hasSGPRInitBug()) { - unsigned MaxNumSGPRs = STM.getMaxNumSGPRs(); - if (ProgInfo.NumSGPR > MaxNumSGPRs) { - // This can happen due to a compiler bug or when using inline asm to use the - // registers which are usually reserved for vcc etc. - + unsigned MaxAddressableNumSGPRs = + AMDGPU::IsaInfo::getAddressableNumSGPRs(STF); + if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) { + // This can happen due to a compiler bug or when using inline asm to use + // the registers which are usually reserved for vcc etc. LLVMContext &Ctx = MF.getFunction()->getContext(); DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "scalar registers", ProgInfo.NumSGPR, DS_Error, - DK_ResourceLimit, MaxNumSGPRs); + DK_ResourceLimit, + MaxAddressableNumSGPRs); Ctx.diagnose(Diag); - ProgInfo.NumSGPR = MaxNumSGPRs; - ProgInfo.NumSGPRsForWavesPerEU = MaxNumSGPRs; + ProgInfo.NumSGPR = MaxAddressableNumSGPRs; + ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs; } } if (STM.hasSGPRInitBug()) { - ProgInfo.NumSGPR = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; - ProgInfo.NumSGPRsForWavesPerEU = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; + ProgInfo.NumSGPR = + AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; + ProgInfo.NumSGPRsForWavesPerEU = + AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; } if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) { @@ -564,14 +575,16 @@ } // SGPRBlocks is actual number of SGPR blocks minus 1. + unsigned SGPRAllocGranule = AMDGPU::IsaInfo::getSGPRAllocGranule(STF); ProgInfo.SGPRBlocks = alignTo(ProgInfo.NumSGPRsForWavesPerEU, - RI->getSGPRAllocGranule()); - ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / RI->getSGPRAllocGranule() - 1; + SGPRAllocGranule); + ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / SGPRAllocGranule - 1; // VGPRBlocks is actual number of VGPR blocks minus 1. + unsigned VGPRAllocGranule = AMDGPU::IsaInfo::getVGPRAllocGranule(STF); ProgInfo.VGPRBlocks = alignTo(ProgInfo.NumVGPRsForWavesPerEU, - RI->getVGPRAllocGranule()); - ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / RI->getVGPRAllocGranule() - 1; + VGPRAllocGranule); + ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / VGPRAllocGranule - 1; // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode // register. Index: lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -216,7 +216,8 @@ OccupancyHint = 7; // Clamp to max value. - OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerEU()); + OccupancyHint = std::min( + OccupancyHint, AMDGPU::IsaInfo::getMaxWavesPerEU(ST.getFeatureBits())); // Check the hint but ignore it if it's obviously wrong from the existing LDS // usage. Index: lib/Target/AMDGPU/AMDGPURuntimeMetadata.h =================================================================== --- lib/Target/AMDGPU/AMDGPURuntimeMetadata.h +++ lib/Target/AMDGPU/AMDGPURuntimeMetadata.h @@ -41,37 +41,90 @@ // Version and revision of runtime metadata const unsigned char MDVersion = 2; - const unsigned char MDRevision = 0; + const unsigned char MDRevision = 1; // Name of keys for runtime metadata. namespace KeyName { - const char MDVersion[] = "amd.MDVersion"; // Runtime metadata version - const char Language[] = "amd.Language"; // Language - const char LanguageVersion[] = "amd.LanguageVersion"; // Language version - const char Kernels[] = "amd.Kernels"; // Kernels - const char KernelName[] = "amd.KernelName"; // Kernel name - const char Args[] = "amd.Args"; // Kernel arguments - const char ArgSize[] = "amd.ArgSize"; // Kernel arg size - const char ArgAlign[] = "amd.ArgAlign"; // Kernel arg alignment - const char ArgTypeName[] = "amd.ArgTypeName"; // Kernel type name - const char ArgName[] = "amd.ArgName"; // Kernel name - const char ArgKind[] = "amd.ArgKind"; // Kernel argument kind - const char ArgValueType[] = "amd.ArgValueType"; // Kernel argument value type - const char ArgAddrQual[] = "amd.ArgAddrQual"; // Kernel argument address qualifier - const char ArgAccQual[] = "amd.ArgAccQual"; // Kernel argument access qualifier - const char ArgIsConst[] = "amd.ArgIsConst"; // Kernel argument is const qualified - const char ArgIsRestrict[] = "amd.ArgIsRestrict"; // Kernel argument is restrict qualified - const char ArgIsVolatile[] = "amd.ArgIsVolatile"; // Kernel argument is volatile qualified - const char ArgIsPipe[] = "amd.ArgIsPipe"; // Kernel argument is pipe qualified - const char ReqdWorkGroupSize[] = "amd.ReqdWorkGroupSize"; // Required work group size - const char WorkGroupSizeHint[] = "amd.WorkGroupSizeHint"; // Work group size hint - const char VecTypeHint[] = "amd.VecTypeHint"; // Vector type hint - const char KernelIndex[] = "amd.KernelIndex"; // Kernel index for device enqueue - const char NoPartialWorkGroups[] = "amd.NoPartialWorkGroups"; // No partial work groups - const char PrintfInfo[] = "amd.PrintfInfo"; // Prinf function call information - const char ArgActualAcc[] = "amd.ArgActualAcc"; // The actual kernel argument access qualifier - const char ArgPointeeAlign[] = "amd.ArgPointeeAlign"; // Alignment of pointee type + // Runtime metadata version + const char MDVersion[] = "amd.MDVersion"; + + // Instruction set architecture information + const char IsaInfo[] = "amd.IsaInfo"; + // Wavefront size + const char IsaInfoWavefrontSize[] = "amd.IsaInfoWavefrontSize"; + // Local memory size in bytes + const char IsaInfoLocalMemorySize[] = "amd.IsaInfoLocalMemorySize"; + // Number of execution units per compute unit + const char IsaInfoEUsPerCU[] = "amd.IsaInfoEUsPerCU"; + // Maximum number of waves per execution unit + const char IsaInfoMaxWavesPerEU[] = "amd.IsaInfoMaxWavesPerEU"; + // Maximum flat work group size + const char IsaInfoMaxFlatWorkGroupSize[] = "amd.IsaInfoMaxFlatWorkGroupSize"; + // SGPR allocation granularity + const char IsaInfoSGPRAllocGranule[] = "amd.IsaInfoSGPRAllocGranule"; + // Total number of SGPRs + const char IsaInfoTotalNumSGPRs[] = "amd.IsaInfoTotalNumSGPRs"; + // Addressable number of SGPRs + const char IsaInfoAddressableNumSGPRs[] = "amd.IsaInfoAddressableNumSGPRs"; + // VGPR allocation granularity + const char IsaInfoVGPRAllocGranule[] = "amd.IsaInfoVGPRAllocGranule"; + // Total number of VGPRs + const char IsaInfoTotalNumVGPRs[] = "amd.IsaInfoTotalNumVGPRs"; + // Addressable number of VGPRs + const char IsaInfoAddressableNumVGPRs[] = "amd.IsaInfoAddressableNumVGPRs"; + + // Language + const char Language[] = "amd.Language"; + // Language version + const char LanguageVersion[] = "amd.LanguageVersion"; + + // Kernels + const char Kernels[] = "amd.Kernels"; + // Kernel name + const char KernelName[] = "amd.KernelName"; + // Kernel arguments + const char Args[] = "amd.Args"; + // Kernel argument size in bytes + const char ArgSize[] = "amd.ArgSize"; + // Kernel argument alignment + const char ArgAlign[] = "amd.ArgAlign"; + // Kernel argument type name + const char ArgTypeName[] = "amd.ArgTypeName"; + // Kernel argument name + const char ArgName[] = "amd.ArgName"; + // Kernel argument kind + const char ArgKind[] = "amd.ArgKind"; + // Kernel argument value type + const char ArgValueType[] = "amd.ArgValueType"; + // Kernel argument address qualifier + const char ArgAddrQual[] = "amd.ArgAddrQual"; + // Kernel argument access qualifier + const char ArgAccQual[] = "amd.ArgAccQual"; + // Kernel argument is const qualified + const char ArgIsConst[] = "amd.ArgIsConst"; + // Kernel argument is restrict qualified + const char ArgIsRestrict[] = "amd.ArgIsRestrict"; + // Kernel argument is volatile qualified + const char ArgIsVolatile[] = "amd.ArgIsVolatile"; + // Kernel argument is pipe qualified + const char ArgIsPipe[] = "amd.ArgIsPipe"; + // Required work group size + const char ReqdWorkGroupSize[] = "amd.ReqdWorkGroupSize"; + // Work group size hint + const char WorkGroupSizeHint[] = "amd.WorkGroupSizeHint"; + // Vector type hint + const char VecTypeHint[] = "amd.VecTypeHint"; + // Kernel index for device enqueue + const char KernelIndex[] = "amd.KernelIndex"; + // No partial work groups + const char NoPartialWorkGroups[] = "amd.NoPartialWorkGroups"; + // Prinf function call information + const char PrintfInfo[] = "amd.PrintfInfo"; + // The actual kernel argument access qualifier + const char ArgActualAcc[] = "amd.ArgActualAcc"; + // Alignment of pointee type + const char ArgPointeeAlign[] = "amd.ArgPointeeAlign"; } // end namespace KeyName @@ -175,11 +228,45 @@ } // end namespace Kernel + namespace IsaInfo { + + /// \brief In-memory representation of instruction set architecture + /// information. + struct Metadata { + /// \brief Wavefront size. + unsigned WavefrontSize = 0; + /// \brief Local memory size in bytes. + unsigned LocalMemorySize = 0; + /// \brief Number of execution units per compute unit. + unsigned EUsPerCU = 0; + /// \brief Maximum number of waves per execution unit. + unsigned MaxWavesPerEU = 0; + /// \brief Maximum flat work group size. + unsigned MaxFlatWorkGroupSize = 0; + /// \brief SGPR allocation granularity. + unsigned SGPRAllocGranule = 0; + /// \brief Total number of SGPRs. + unsigned TotalNumSGPRs = 0; + /// \brief Addressable number of SGPRs. + unsigned AddressableNumSGPRs = 0; + /// \brief VGPR allocation granularity. + unsigned VGPRAllocGranule = 0; + /// \brief Total number of VGPRs. + unsigned TotalNumVGPRs = 0; + /// \brief Addressable number of VGPRs. + unsigned AddressableNumVGPRs = 0; + + Metadata() = default; + }; + + } // end namespace IsaInfo + namespace Program { // In-memory representation of program information. struct Metadata { std::vector MDVersionSeq; + IsaInfo::Metadata IsaInfo; std::vector PrintfInfo; std::vector Kernels; Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -357,69 +357,6 @@ return true; } - /// \returns Number of execution units per compute unit supported by the - /// subtarget. - unsigned getEUsPerCU() const { - return 4; - } - - /// \returns Maximum number of work groups per compute unit supported by the - /// subtarget and limited by given flat work group size. - unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const { - if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) - return 8; - return getWavesPerWorkGroup(FlatWorkGroupSize) == 1 ? 40 : 16; - } - - /// \returns Maximum number of waves per compute unit supported by the - /// subtarget without any kind of limitation. - unsigned getMaxWavesPerCU() const { - return getMaxWavesPerEU() * getEUsPerCU(); - } - - /// \returns Maximum number of waves per compute unit supported by the - /// subtarget and limited by given flat work group size. - unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const { - return getWavesPerWorkGroup(FlatWorkGroupSize); - } - - /// \returns Minimum number of waves per execution unit supported by the - /// subtarget. - unsigned getMinWavesPerEU() const { - return 1; - } - - /// \returns Maximum number of waves per execution unit supported by the - /// subtarget without any kind of limitation. - unsigned getMaxWavesPerEU() const { - if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) - return 8; - // FIXME: Need to take scratch memory into account. - return 10; - } - - /// \returns Maximum number of waves per execution unit supported by the - /// subtarget and limited by given flat work group size. - unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const { - return alignTo(getMaxWavesPerCU(FlatWorkGroupSize), getEUsPerCU()) / - getEUsPerCU(); - } - - /// \returns Minimum flat work group size supported by the subtarget. - unsigned getMinFlatWorkGroupSize() const { - return 1; - } - - /// \returns Maximum flat work group size supported by the subtarget. - unsigned getMaxFlatWorkGroupSize() const { - return 2048; - } - - /// \returns Number of waves per work group given the flat work group size. - unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const { - return alignTo(FlatWorkGroupSize, getWavefrontSize()) / getWavefrontSize(); - } - void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;} bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;} @@ -484,13 +421,6 @@ }; class SISubtarget final : public AMDGPUSubtarget { -public: - enum { - // The closed Vulkan driver sets 96, which limits the wave count to 8 but - // doesn't spill SGPRs as much as when 80 is set. - FIXED_SGPR_COUNT_FOR_INIT_BUG = 96 - }; - private: SIInstrInfo InstrInfo; SIFrameLowering FrameLowering; @@ -622,8 +552,6 @@ bool needWaitcntBeforeBarrier() const { return true; } - - unsigned getMaxNumSGPRs() const; }; } // end namespace llvm Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -208,9 +208,11 @@ return Default; // Make sure requested values do not violate subtarget's specifications. - if (Requested.first < getMinFlatWorkGroupSize()) + if (Requested.first < + AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits())) return Default; - if (Requested.second > getMaxFlatWorkGroupSize()) + if (Requested.second > + AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits())) return Default; return Requested; @@ -228,8 +230,8 @@ // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum // number of waves per execution unit to values implied by requested // minimum/maximum flat work group sizes. - unsigned MinImpliedByFlatWorkGroupSize = - getMaxWavesPerEU(FlatWorkGroupSizes.second); + unsigned MinImpliedByFlatWorkGroupSize = AMDGPU::IsaInfo::getMaxWavesPerEU( + getFeatureBits(), FlatWorkGroupSizes.second); bool RequestedFlatWorkGroupSize = false; // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa @@ -249,10 +251,10 @@ return Default; // Make sure requested values do not violate subtarget's specifications. - if (Requested.first < getMinWavesPerEU() || - Requested.first > getMaxWavesPerEU()) + if (Requested.first < AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits()) || + Requested.first > AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits())) return Default; - if (Requested.second > getMaxWavesPerEU()) + if (Requested.second > AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits())) return Default; // Make sure requested values are compatible with values implied by requested @@ -300,7 +302,7 @@ } unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF, - unsigned ExplicitArgBytes) const { + unsigned ExplicitArgBytes) const { unsigned ImplicitBytes = getImplicitArgNumBytes(MF); if (ImplicitBytes == 0) return ExplicitArgBytes; @@ -353,13 +355,3 @@ return 2; return 1; } - -unsigned SISubtarget::getMaxNumSGPRs() const { - if (hasSGPRInitBug()) - return SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; - - if (getGeneration() >= VOLCANIC_ISLANDS) - return 102; - - return 104; -} Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -794,14 +794,16 @@ // Currently there is none suitable machinery in the core llvm-mc for this. // MCSymbol::isRedefinable is intended for another purpose, and // AsmParser::parseDirectiveSet() cannot be specialized for specific target. - AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits()); + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits()); MCContext &Ctx = getContext(); - MCSymbol *Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); - Sym->setVariableValue(MCConstantExpr::create(Isa.Major, Ctx)); + MCSymbol *Sym = + Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); + Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); - Sym->setVariableValue(MCConstantExpr::create(Isa.Minor, Ctx)); + Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); - Sym->setVariableValue(MCConstantExpr::create(Isa.Stepping, Ctx)); + Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); } KernelScope.initialize(getContext()); } @@ -1859,9 +1861,10 @@ // If this directive has no arguments, then use the ISA version for the // targeted GPU. if (getLexer().is(AsmToken::EndOfStatement)) { - AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits()); - getTargetStreamer().EmitDirectiveHSACodeObjectISA(Isa.Major, Isa.Minor, - Isa.Stepping, + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits()); + getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, + ISA.Stepping, "AMD", "AMDGPU"); return false; } @@ -2448,13 +2451,14 @@ if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) Parser.Lex(); - IsaVersion IV = getIsaVersion(getSTI().getFeatureBits()); + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits()); if (CntName == "vmcnt") - IntVal = encodeVmcnt(IV, IntVal, CntVal); + IntVal = encodeVmcnt(ISA, IntVal, CntVal); else if (CntName == "expcnt") - IntVal = encodeExpcnt(IV, IntVal, CntVal); + IntVal = encodeExpcnt(ISA, IntVal, CntVal); else if (CntName == "lgkmcnt") - IntVal = encodeLgkmcnt(IV, IntVal, CntVal); + IntVal = encodeLgkmcnt(ISA, IntVal, CntVal); else return true; @@ -2463,8 +2467,9 @@ OperandMatchResultTy AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { - IsaVersion IV = getIsaVersion(getSTI().getFeatureBits()); - int64_t Waitcnt = getWaitcntBitMask(IV); + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits()); + int64_t Waitcnt = getWaitcntBitMask(ISA); SMLoc S = Parser.getTok().getLoc(); switch(getLexer().getKind()) { Index: lib/Target/AMDGPU/GCNSchedStrategy.cpp =================================================================== --- lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -135,6 +135,7 @@ const RegPressureTracker &RPTracker, SchedCandidate &Cand) { const SISubtarget &ST = DAG->MF.getSubtarget(); + const FeatureBitset &STF = ST.getFeatureBits(); const SIRegisterInfo *SRI = static_cast(TRI); ArrayRef Pressure = RPTracker.getRegSetPressureAtPos(); unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()]; @@ -144,8 +145,10 @@ unsigned VGPRExcessLimit = Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass); unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF); - unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves, true); - unsigned VGPRCriticalLimit = SRI->getMaxNumVGPRs(MaxWaves); + unsigned SGPRCriticalLimit = + AMDGPU::IsaInfo::getMaxNumSGPRs(STF, MaxWaves, true); + unsigned VGPRCriticalLimit = + AMDGPU::IsaInfo::getMaxNumVGPRs(STF, MaxWaves); ReadyQueue &Q = Zone.Available; for (SUnit *SU : Q) { Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -1057,27 +1057,28 @@ void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - IsaVersion IV = getIsaVersion(STI.getFeatureBits()); + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits()); unsigned SImm16 = MI->getOperand(OpNo).getImm(); unsigned Vmcnt, Expcnt, Lgkmcnt; - decodeWaitcnt(IV, SImm16, Vmcnt, Expcnt, Lgkmcnt); + decodeWaitcnt(ISA, SImm16, Vmcnt, Expcnt, Lgkmcnt); bool NeedSpace = false; - if (Vmcnt != getVmcntBitMask(IV)) { + if (Vmcnt != getVmcntBitMask(ISA)) { O << "vmcnt(" << Vmcnt << ')'; NeedSpace = true; } - if (Expcnt != getExpcntBitMask(IV)) { + if (Expcnt != getExpcntBitMask(ISA)) { if (NeedSpace) O << ' '; O << "expcnt(" << Expcnt << ')'; NeedSpace = true; } - if (Lgkmcnt != getLgkmcntBitMask(IV)) { + if (Lgkmcnt != getLgkmcntBitMask(ISA)) { if (NeedSpace) O << ' '; O << "lgkmcnt(" << Lgkmcnt << ')'; Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h @@ -17,10 +17,12 @@ #include namespace llvm { +class FeatureBitset; class Module; // Get runtime metadata as YAML string. -std::string getRuntimeMDYAMLString(Module &M); +std::string getRuntimeMDYAMLString(const FeatureBitset &Features, + const Module &M); } #endif Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp @@ -16,6 +16,7 @@ #include "AMDGPU.h" #include "AMDGPURuntimeMetadata.h" #include "MCTargetDesc/AMDGPURuntimeMD.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" @@ -92,9 +93,30 @@ static const bool flow = true; }; +template <> struct MappingTraits { + static void mapping(IO &YamlIO, IsaInfo::Metadata &I) { + YamlIO.mapRequired(KeyName::IsaInfoWavefrontSize, I.WavefrontSize); + YamlIO.mapRequired(KeyName::IsaInfoLocalMemorySize, I.LocalMemorySize); + YamlIO.mapRequired(KeyName::IsaInfoEUsPerCU, I.EUsPerCU); + YamlIO.mapRequired(KeyName::IsaInfoMaxWavesPerEU, I.MaxWavesPerEU); + YamlIO.mapRequired(KeyName::IsaInfoMaxFlatWorkGroupSize, + I.MaxFlatWorkGroupSize); + YamlIO.mapRequired(KeyName::IsaInfoSGPRAllocGranule, I.SGPRAllocGranule); + YamlIO.mapRequired(KeyName::IsaInfoTotalNumSGPRs, I.TotalNumSGPRs); + YamlIO.mapRequired(KeyName::IsaInfoAddressableNumSGPRs, + I.AddressableNumSGPRs); + YamlIO.mapRequired(KeyName::IsaInfoVGPRAllocGranule, I.VGPRAllocGranule); + YamlIO.mapRequired(KeyName::IsaInfoTotalNumVGPRs, I.TotalNumVGPRs); + YamlIO.mapRequired(KeyName::IsaInfoAddressableNumVGPRs, + I.AddressableNumVGPRs); + } + static const bool flow = true; +}; + template <> struct MappingTraits { static void mapping(IO &YamlIO, Program::Metadata &Prog) { YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq); + YamlIO.mapRequired(KeyName::IsaInfo, Prog.IsaInfo); YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo); YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels); } @@ -383,10 +405,27 @@ } } -std::string llvm::getRuntimeMDYAMLString(Module &M) { +std::string llvm::getRuntimeMDYAMLString(const FeatureBitset &Features, + const Module &M) { Program::Metadata Prog; Prog.MDVersionSeq.push_back(MDVersion); Prog.MDVersionSeq.push_back(MDRevision); + Prog.IsaInfo.WavefrontSize = AMDGPU::IsaInfo::getWavefrontSize(Features); + Prog.IsaInfo.LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(Features); + Prog.IsaInfo.EUsPerCU = AMDGPU::IsaInfo::getEUsPerCU(Features); + Prog.IsaInfo.MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(Features); + Prog.IsaInfo.MaxFlatWorkGroupSize = + AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(Features); + Prog.IsaInfo.SGPRAllocGranule = + AMDGPU::IsaInfo::getSGPRAllocGranule(Features); + Prog.IsaInfo.TotalNumSGPRs = AMDGPU::IsaInfo::getTotalNumSGPRs(Features); + Prog.IsaInfo.AddressableNumSGPRs = + AMDGPU::IsaInfo::getAddressableNumSGPRs(Features); + Prog.IsaInfo.VGPRAllocGranule = + AMDGPU::IsaInfo::getVGPRAllocGranule(Features); + Prog.IsaInfo.TotalNumVGPRs = AMDGPU::IsaInfo::getTotalNumVGPRs(Features); + Prog.IsaInfo.AddressableNumVGPRs = + AMDGPU::IsaInfo::getAddressableNumVGPRs(Features); // Set PrintfInfo. if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) { Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -17,6 +17,7 @@ #include "AMDGPUPTNote.h" class DataLayout; +class FeatureBitset; class Function; class MCELFStreamer; class MCSymbol; @@ -46,7 +47,8 @@ virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0; - virtual void EmitRuntimeMetadata(Module &M) = 0; + virtual void EmitRuntimeMetadata(const FeatureBitset &Features, + const Module &M) = 0; virtual void EmitRuntimeMetadata(StringRef Metadata) = 0; }; @@ -70,7 +72,8 @@ void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - void EmitRuntimeMetadata(Module &M) override; + void EmitRuntimeMetadata(const FeatureBitset &Features, + const Module &M) override; void EmitRuntimeMetadata(StringRef Metadata) override; }; @@ -101,7 +104,8 @@ void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - void EmitRuntimeMetadata(Module &M) override; + void EmitRuntimeMetadata(const FeatureBitset &Features, + const Module &M) override; void EmitRuntimeMetadata(StringRef Metadata) override; }; Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -93,9 +93,10 @@ OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n'; } -void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(Module &M) { +void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(const FeatureBitset &Features, + const Module &M) { OS << "\t.amdgpu_runtime_metadata\n"; - OS << getRuntimeMDYAMLString(M); + OS << getRuntimeMDYAMLString(Features, M); OS << "\n\t.end_amdgpu_runtime_metadata\n"; } @@ -236,6 +237,7 @@ ); } -void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(Module &M) { - EmitRuntimeMetadata(getRuntimeMDYAMLString(M)); +void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(const FeatureBitset &Features, + const Module &M) { + EmitRuntimeMetadata(getRuntimeMDYAMLString(Features, M)); } Index: lib/Target/AMDGPU/SIInsertWaits.cpp =================================================================== --- lib/Target/AMDGPU/SIInsertWaits.cpp +++ lib/Target/AMDGPU/SIInsertWaits.cpp @@ -47,7 +47,6 @@ #define DEBUG_TYPE "si-insert-waits" using namespace llvm; -using namespace llvm::AMDGPU; namespace { @@ -76,7 +75,7 @@ const SIInstrInfo *TII = nullptr; const SIRegisterInfo *TRI = nullptr; const MachineRegisterInfo *MRI; - IsaVersion IV; + AMDGPU::IsaInfo::IsaVersion ISA; /// \brief Constant zero value static const Counters ZeroCounts; @@ -427,10 +426,10 @@ // Build the wait instruction BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) - .addImm(encodeWaitcnt(IV, - Counts.Named.VM, - Counts.Named.EXP, - Counts.Named.LGKM)); + .addImm(AMDGPU::encodeWaitcnt(ISA, + Counts.Named.VM, + Counts.Named.EXP, + Counts.Named.LGKM)); LastOpcodeType = OTHER; LastInstWritesM0 = false; @@ -458,9 +457,9 @@ unsigned Imm = I->getOperand(0).getImm(); Counters Counts, WaitOn; - Counts.Named.VM = decodeVmcnt(IV, Imm); - Counts.Named.EXP = decodeExpcnt(IV, Imm); - Counts.Named.LGKM = decodeLgkmcnt(IV, Imm); + Counts.Named.VM = AMDGPU::decodeVmcnt(ISA, Imm); + Counts.Named.EXP = AMDGPU::decodeExpcnt(ISA, Imm); + Counts.Named.LGKM = AMDGPU::decodeLgkmcnt(ISA, Imm); for (unsigned i = 0; i < 3; ++i) { if (Counts.Array[i] <= LastIssued.Array[i]) @@ -534,12 +533,12 @@ TII = ST->getInstrInfo(); TRI = &TII->getRegisterInfo(); MRI = &MF.getRegInfo(); - IV = getIsaVersion(ST->getFeatureBits()); + ISA = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits()); const SIMachineFunctionInfo *MFI = MF.getInfo(); - HardwareLimits.Named.VM = getVmcntBitMask(IV); - HardwareLimits.Named.EXP = getExpcntBitMask(IV); - HardwareLimits.Named.LGKM = getLgkmcntBitMask(IV); + HardwareLimits.Named.VM = AMDGPU::getVmcntBitMask(ISA); + HardwareLimits.Named.EXP = AMDGPU::getExpcntBitMask(ISA); + HardwareLimits.Named.LGKM = AMDGPU::getLgkmcntBitMask(ISA); WaitedOn = ZeroCounts; DelayedWaitOn = ZeroCounts; Index: lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.h +++ lib/Target/AMDGPU/SIRegisterInfo.h @@ -195,29 +195,8 @@ return VGPRPressureSets.test(SetID) && !SGPRPressureSets.test(SetID); } - /// \returns SGPR allocation granularity supported by the subtarget. - unsigned getSGPRAllocGranule() const { - return 8; - } - - /// \returns Total number of SGPRs supported by the subtarget. - unsigned getTotalNumSGPRs(const SISubtarget &ST) const; - - /// \returns Number of addressable SGPRs supported by the subtarget. - unsigned getNumAddressableSGPRs(const SISubtarget &ST) const; - - /// \returns Number of reserved SGPRs supported by the subtarget. - unsigned getNumReservedSGPRs(const SISubtarget &ST, - const SIMachineFunctionInfo &MFI) const; - - /// \returns Minimum number of SGPRs that meets given number of waves per - /// execution unit requirement for given subtarget. - unsigned getMinNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU) const; - - /// \returns Maximum number of SGPRs that meets given number of waves per - /// execution unit requirement for given subtarget. - unsigned getMaxNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU, - bool Addressable) const; + /// \returns Reserved number of SGPRs for given function \p MF. + unsigned getReservedNumSGPRs(const MachineFunction &MF) const; /// \returns Maximum number of SGPRs that meets number of waves per execution /// unit requirement for function \p MF, or number of SGPRs explicitly @@ -229,27 +208,8 @@ /// unit requirement. unsigned getMaxNumSGPRs(const MachineFunction &MF) const; - /// \returns VGPR allocation granularity supported by the subtarget. - unsigned getVGPRAllocGranule() const { - return 4; - } - - /// \returns Total number of VGPRs supported by the subtarget. - unsigned getTotalNumVGPRs() const { - return 256; - } - - /// \returns Number of reserved VGPRs for debugger use supported by the - /// subtarget. - unsigned getNumDebuggerReservedVGPRs(const SISubtarget &ST) const; - - /// \returns Minimum number of SGPRs that meets given number of waves per - /// execution unit requirement. - unsigned getMinNumVGPRs(unsigned WavesPerEU) const; - - /// \returns Maximum number of VGPRs that meets given number of waves per - /// execution unit requirement. - unsigned getMaxNumVGPRs(unsigned WavesPerEU) const; + /// \returns Reserved number of VGPRs for given function \p MF. + unsigned getReservedNumVGPRs(const MachineFunction &MF) const; /// \returns Maximum number of VGPRs that meets number of waves per execution /// unit requirement for function \p MF, or number of VGPRs explicitly Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1156,20 +1156,10 @@ return AMDGPU::NoRegister; } -unsigned SIRegisterInfo::getTotalNumSGPRs(const SISubtarget &ST) const { - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) - return 800; - return 512; -} - -unsigned SIRegisterInfo::getNumAddressableSGPRs(const SISubtarget &ST) const { - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) - return 102; - return 104; -} +unsigned SIRegisterInfo::getReservedNumSGPRs(const MachineFunction &MF) const { + const SISubtarget &ST = MF.getSubtarget(); + const SIMachineFunctionInfo &MFI = *MF.getInfo(); -unsigned SIRegisterInfo::getNumReservedSGPRs(const SISubtarget &ST, - const SIMachineFunctionInfo &MFI) const { if (MFI.hasFlatScratchInit()) { if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) return 6; // FLAT_SCRATCH, XNACK, VCC (in that order) @@ -1184,66 +1174,22 @@ return 2; // VCC. } -unsigned SIRegisterInfo::getMinNumSGPRs(const SISubtarget &ST, - unsigned WavesPerEU) const { - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - switch (WavesPerEU) { - case 0: return 0; - case 10: return 0; - case 9: return 0; - case 8: return 81; - default: return 97; - } - } else { - switch (WavesPerEU) { - case 0: return 0; - case 10: return 0; - case 9: return 49; - case 8: return 57; - case 7: return 65; - case 6: return 73; - case 5: return 81; - default: return 97; - } - } -} - -unsigned SIRegisterInfo::getMaxNumSGPRs(const SISubtarget &ST, - unsigned WavesPerEU, - bool Addressable) const { - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - switch (WavesPerEU) { - case 0: return 80; - case 10: return 80; - case 9: return 80; - case 8: return 96; - default: return Addressable ? getNumAddressableSGPRs(ST) : 112; - } - } else { - switch (WavesPerEU) { - case 0: return 48; - case 10: return 48; - case 9: return 56; - case 8: return 64; - case 7: return 72; - case 6: return 80; - case 5: return 96; - default: return getNumAddressableSGPRs(ST); - } - } -} - unsigned SIRegisterInfo::getMaxNumSGPRs(const MachineFunction &MF) const { const Function &F = *MF.getFunction(); const SISubtarget &ST = MF.getSubtarget(); + const FeatureBitset &STF = ST.getFeatureBits(); const SIMachineFunctionInfo &MFI = *MF.getInfo(); // Compute maximum number of SGPRs function can use using default/requested // minimum number of waves per execution unit. std::pair WavesPerEU = MFI.getWavesPerEU(); - unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, false); - unsigned MaxNumAddressableSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, true); + unsigned MinNumSGPRs = + AMDGPU::IsaInfo::getMinNumSGPRs(STF, WavesPerEU.second); + unsigned MaxNumSGPRs = + AMDGPU::IsaInfo::getMaxNumSGPRs(STF, WavesPerEU.first, false); + unsigned MaxNumAddressableSGPRs = + AMDGPU::IsaInfo::getMaxNumSGPRs(STF, WavesPerEU.first, true); // Check if maximum number of SGPRs was explicitly requested using // "amdgpu-num-sgpr" attribute. @@ -1252,7 +1198,7 @@ F, "amdgpu-num-sgpr", MaxNumSGPRs); // Make sure requested value does not violate subtarget's specifications. - if (Requested && (Requested <= getNumReservedSGPRs(ST, MFI))) + if (Requested && (Requested <= getReservedNumSGPRs(MF))) Requested = 0; // If more SGPRs are required to support the input user/system SGPRs, @@ -1268,10 +1214,9 @@ // Make sure requested value is compatible with values implied by // default/requested minimum/maximum number of waves per execution unit. - if (Requested && Requested > getMaxNumSGPRs(ST, WavesPerEU.first, false)) + if (Requested && Requested > MaxNumSGPRs) Requested = 0; - if (WavesPerEU.second && - Requested && Requested < getMinNumSGPRs(ST, WavesPerEU.second)) + if (WavesPerEU.second && Requested && Requested < MinNumSGPRs) Requested = 0; if (Requested) @@ -1279,61 +1224,35 @@ } if (ST.hasSGPRInitBug()) - MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; + MaxNumSGPRs = AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; - return std::min(MaxNumSGPRs - getNumReservedSGPRs(ST, MFI), + return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF), MaxNumAddressableSGPRs); } -unsigned SIRegisterInfo::getNumDebuggerReservedVGPRs( - const SISubtarget &ST) const { +unsigned SIRegisterInfo::getReservedNumVGPRs(const MachineFunction &MF) const { + const SISubtarget &ST = MF.getSubtarget(); + if (ST.debuggerReserveRegs()) return 4; - return 0; -} - -unsigned SIRegisterInfo::getMinNumVGPRs(unsigned WavesPerEU) const { - switch (WavesPerEU) { - case 0: return 0; - case 10: return 0; - case 9: return 25; - case 8: return 29; - case 7: return 33; - case 6: return 37; - case 5: return 41; - case 4: return 49; - case 3: return 65; - case 2: return 85; - default: return 129; - } -} -unsigned SIRegisterInfo::getMaxNumVGPRs(unsigned WavesPerEU) const { - switch (WavesPerEU) { - case 0: return 24; - case 10: return 24; - case 9: return 28; - case 8: return 32; - case 7: return 36; - case 6: return 40; - case 5: return 48; - case 4: return 64; - case 3: return 84; - case 2: return 128; - default: return getTotalNumVGPRs(); - } + return 0; } unsigned SIRegisterInfo::getMaxNumVGPRs(const MachineFunction &MF) const { const Function &F = *MF.getFunction(); const SISubtarget &ST = MF.getSubtarget(); + const FeatureBitset &STF = ST.getFeatureBits(); const SIMachineFunctionInfo &MFI = *MF.getInfo(); // Compute maximum number of VGPRs function can use using default/requested // minimum number of waves per execution unit. std::pair WavesPerEU = MFI.getWavesPerEU(); - unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first); + unsigned MinNumVGPRs = + AMDGPU::IsaInfo::getMinNumVGPRs(STF, WavesPerEU.second); + unsigned MaxNumVGPRs = + AMDGPU::IsaInfo::getMaxNumVGPRs(STF, WavesPerEU.first); // Check if maximum number of VGPRs was explicitly requested using // "amdgpu-num-vgpr" attribute. @@ -1342,22 +1261,21 @@ F, "amdgpu-num-vgpr", MaxNumVGPRs); // Make sure requested value does not violate subtarget's specifications. - if (Requested && Requested <= getNumDebuggerReservedVGPRs(ST)) + if (Requested && Requested <= getReservedNumVGPRs(MF)) Requested = 0; // Make sure requested value is compatible with values implied by // default/requested minimum/maximum number of waves per execution unit. - if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first)) + if (Requested && Requested > MaxNumVGPRs) Requested = 0; - if (WavesPerEU.second && - Requested && Requested < getMinNumVGPRs(WavesPerEU.second)) + if (WavesPerEU.second && Requested && Requested < MinNumVGPRs) Requested = 0; if (Requested) MaxNumVGPRs = Requested; } - return MaxNumVGPRs - getNumDebuggerReservedVGPRs(ST); + return MaxNumVGPRs - getReservedNumVGPRs(MF); } ArrayRef SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC, Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -33,16 +33,112 @@ namespace AMDGPU { -LLVM_READONLY -int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); +namespace IsaInfo { +enum { + // The closed Vulkan driver sets 96, which limits the wave count to 8 but + // doesn't spill SGPRs as much as when 80 is set. + FIXED_NUM_SGPRS_FOR_INIT_BUG = 96 +}; + +/// \brief Instruction set architecture version. struct IsaVersion { unsigned Major; unsigned Minor; unsigned Stepping; }; +/// \returns Isa version for given subtarget \p Features. IsaVersion getIsaVersion(const FeatureBitset &Features); + +/// \returns Wavefront size for given subtarget \p Features. +unsigned getWavefrontSize(const FeatureBitset &Features); + +/// \returns Local memory size in bytes for given subtarget \p Features. +unsigned getLocalMemorySize(const FeatureBitset &Features); + +/// \returns Number of execution units per compute unit for given subtarget \p +/// Features. +unsigned getEUsPerCU(const FeatureBitset &Features); + +/// \returns Maximum number of work groups per compute unit for given subtarget +/// \p Features and limited by given \p FlatWorkGroupSize. +unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, + unsigned FlatWorkGroupSize); + +/// \returns Maximum number of waves per compute unit for given subtarget \p +/// Features without any kind of limitation. +unsigned getMaxWavesPerCU(const FeatureBitset &Features); + +/// \returns Maximum number of waves per compute unit for given subtarget \p +/// Features and limited by given \p FlatWorkGroupSize. +unsigned getMaxWavesPerCU(const FeatureBitset &Features, + unsigned FlatWorkGroupSize); + +/// \returns Minimum number of waves per execution unit for given subtarget \p +/// Features. +unsigned getMinWavesPerEU(const FeatureBitset &Features); + +/// \returns Maximum number of waves per execution unit for given subtarget \p +/// Features without any kind of limitation. +unsigned getMaxWavesPerEU(const FeatureBitset &Features); + +/// \returns Maximum number of waves per execution unit for given subtarget \p +/// Features and limited by given \p FlatWorkGroupSize. +unsigned getMaxWavesPerEU(const FeatureBitset &Features, + unsigned FlatWorkGroupSize); + +/// \returns Minimum flat work group size for given subtarget \p Features. +unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features); + +/// \returns Maximum flat work group size for given subtarget \p Features. +unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features); + +/// \returns Number of waves per work group for given subtarget \p Features and +/// limited by given \p FlatWorkGroupSize. +unsigned getWavesPerWorkGroup(const FeatureBitset &Features, + unsigned FlatWorkGroupSize); + +/// \returns SGPR allocation granularity for given subtarget \p Features. +unsigned getSGPRAllocGranule(const FeatureBitset &Features); + +/// \returns Total number of SGPRs for given subtarget \p Features. +unsigned getTotalNumSGPRs(const FeatureBitset &Features); + +/// \returns Addressable number of SGPRs for given subtarget \p Features. +unsigned getAddressableNumSGPRs(const FeatureBitset &Features); + +/// \returns Minimum number of SGPRs that meets the given number of waves per +/// execution unit requirement for given subtarget \p Features. +unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU); + +/// \returns Maximum number of SGPRs that meets the given number of waves per +/// execution unit requirement for given subtarget \p Features. +unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, + bool Addressable); + +/// \returns VGPR allocation granularity for given subtarget \p Features. +unsigned getVGPRAllocGranule(const FeatureBitset &Features); + +/// \returns Total number of VGPRs for given subtarget \p Features. +unsigned getTotalNumVGPRs(const FeatureBitset &Features); + +/// \returns Addressable number of VGPRs for given subtarget \p Features. +unsigned getAddressableNumVGPRs(const FeatureBitset &Features); + +/// \returns Minimum number of VGPRs that meets given number of waves per +/// execution unit requirement for given subtarget \p Features. +unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU); + +/// \returns Maximum number of VGPRs that meets given number of waves per +/// execution unit requirement for given subtarget \p Features. +unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU); + +} // namespace IsaInfo + +LLVM_READONLY +int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); + void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features); MCSection *getHSATextSection(MCContext &Ctx); @@ -83,26 +179,26 @@ std::pair Default, bool OnlyFirstRequired = false); -/// \returns Waitcnt bit mask for given isa \p Version. -unsigned getWaitcntBitMask(IsaVersion Version); - /// \returns Vmcnt bit mask for given isa \p Version. -unsigned getVmcntBitMask(IsaVersion Version); +unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version); /// \returns Expcnt bit mask for given isa \p Version. -unsigned getExpcntBitMask(IsaVersion Version); +unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version); /// \returns Lgkmcnt bit mask for given isa \p Version. -unsigned getLgkmcntBitMask(IsaVersion Version); +unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version); + +/// \returns Waitcnt bit mask for given isa \p Version. +unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version); /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. -unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt); +unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. -unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt); +unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. -unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt); +unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); /// \brief Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and @@ -112,17 +208,20 @@ /// \p Vmcnt = \p Waitcnt[3:0] /// \p Expcnt = \p Waitcnt[6:4] /// \p Lgkmcnt = \p Waitcnt[11:8] -void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt, +void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. -unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt); +unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, + unsigned Vmcnt); /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. -unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt); +unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, + unsigned Expcnt); /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. -unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt); +unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, + unsigned Lgkmcnt); /// \brief Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa /// \p Version. @@ -134,7 +233,7 @@ /// /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given /// isa \p Version. -unsigned encodeWaitcnt(IsaVersion Version, +unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); unsigned getInitialPSInputAddr(const Function &F); Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -74,47 +74,228 @@ /// \returns Lgkmcnt bit width. unsigned getLgkmcntBitWidth() { return 4; } -} // anonymous namespace +} // namespace anonymous namespace llvm { namespace AMDGPU { -IsaVersion getIsaVersion(const FeatureBitset &Features) { +namespace IsaInfo { +IsaVersion getIsaVersion(const FeatureBitset &Features) { + // CI. if (Features.test(FeatureISAVersion7_0_0)) return {7, 0, 0}; - if (Features.test(FeatureISAVersion7_0_1)) return {7, 0, 1}; - if (Features.test(FeatureISAVersion7_0_2)) return {7, 0, 2}; + // VI. if (Features.test(FeatureISAVersion8_0_0)) return {8, 0, 0}; - if (Features.test(FeatureISAVersion8_0_1)) return {8, 0, 1}; - if (Features.test(FeatureISAVersion8_0_2)) return {8, 0, 2}; - if (Features.test(FeatureISAVersion8_0_3)) return {8, 0, 3}; - if (Features.test(FeatureISAVersion8_0_4)) return {8, 0, 4}; - if (Features.test(FeatureISAVersion8_1_0)) return {8, 1, 0}; - return {0, 0, 0}; + if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) + return {0, 0, 0}; + return {7, 0, 0}; +} + +unsigned getWavefrontSize(const FeatureBitset &Features) { + if (Features.test(FeatureWavefrontSize16)) + return 16; + if (Features.test(FeatureWavefrontSize32)) + return 32; + + return 64; +} + +unsigned getLocalMemorySize(const FeatureBitset &Features) { + if (Features.test(FeatureLocalMemorySize32768)) + return 32768; + if (Features.test(FeatureLocalMemorySize65536)) + return 65536; + + return 0; +} + +unsigned getEUsPerCU(const FeatureBitset &Features) { + return 4; +} + +unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, + unsigned FlatWorkGroupSize) { + IsaVersion Version = getIsaVersion(Features); + if (Version.Major < 7) + return 8; + return getWavesPerWorkGroup(Features, FlatWorkGroupSize) == 1 ? 40 : 16; +} + +unsigned getMaxWavesPerCU(const FeatureBitset &Features) { + return getMaxWavesPerEU(Features) * getEUsPerCU(Features); +} + +unsigned getMaxWavesPerCU(const FeatureBitset &Features, + unsigned FlatWorkGroupSize) { + return getWavesPerWorkGroup(Features, FlatWorkGroupSize); +} + +unsigned getMinWavesPerEU(const FeatureBitset &Features) { + return 1; +} + +unsigned getMaxWavesPerEU(const FeatureBitset &Features) { + // FIXME: Need to take scratch memory into account. + return 10; +} + +unsigned getMaxWavesPerEU(const FeatureBitset &Features, + unsigned FlatWorkGroupSize) { + return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), + getEUsPerCU(Features)) / getEUsPerCU(Features); +} + +unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { + return 1; +} + +unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { + return 2048; +} + +unsigned getWavesPerWorkGroup(const FeatureBitset &Features, + unsigned FlatWorkGroupSize) { + return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / + getWavefrontSize(Features); +} + +unsigned getSGPRAllocGranule(const FeatureBitset &Features) { + return 8; +} + +unsigned getTotalNumSGPRs(const FeatureBitset &Features) { + IsaVersion Version = getIsaVersion(Features); + if (Version.Major >= 8) + return 800; + return 512; } +unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { + if (Features.test(FeatureSGPRInitBug)) + return FIXED_NUM_SGPRS_FOR_INIT_BUG; + + IsaVersion Version = getIsaVersion(Features); + if (Version.Major >= 8) + return 102; + return 104; +} + +unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { + IsaVersion Version = getIsaVersion(Features); + if (Version.Major >= 8) { + switch (WavesPerEU) { + case 0: return 0; + case 10: return 0; + case 9: return 0; + case 8: return 81; + default: return 97; + } + } else { + switch (WavesPerEU) { + case 0: return 0; + case 10: return 0; + case 9: return 49; + case 8: return 57; + case 7: return 65; + case 6: return 73; + case 5: return 81; + default: return 97; + } + } +} + +unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, + bool Addressable) { + IsaVersion Version = getIsaVersion(Features); + if (Version.Major >= 8) { + switch (WavesPerEU) { + case 0: return 80; + case 10: return 80; + case 9: return 80; + case 8: return 96; + default: return Addressable ? getAddressableNumSGPRs(Features) : 112; + } + } else { + switch (WavesPerEU) { + case 0: return 48; + case 10: return 48; + case 9: return 56; + case 8: return 64; + case 7: return 72; + case 6: return 80; + case 5: return 96; + default: return getAddressableNumSGPRs(Features); + } + } +} + +unsigned getVGPRAllocGranule(const FeatureBitset &Features) { + return 4; +} + +unsigned getTotalNumVGPRs(const FeatureBitset &Features) { + return 256; +} + +unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { + return 256; +} + +unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { + switch (WavesPerEU) { + case 0: return 0; + case 10: return 0; + case 9: return 25; + case 8: return 29; + case 7: return 33; + case 6: return 37; + case 5: return 41; + case 4: return 49; + case 3: return 65; + case 2: return 85; + default: return 129; + } +} + +unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { + switch (WavesPerEU) { + case 0: return 24; + case 10: return 24; + case 9: return 28; + case 8: return 32; + case 7: return 36; + case 6: return 40; + case 5: return 48; + case 4: return 64; + case 3: return 84; + case 2: return 128; + default: return getTotalNumVGPRs(Features); + } +} + +} // namespace IsaInfo + void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features) { - - IsaVersion ISA = getIsaVersion(Features); + IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); memset(&Header, 0, sizeof(Header)); @@ -222,57 +403,60 @@ return Ints; } -unsigned getWaitcntBitMask(IsaVersion Version) { - unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth()); - unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); - unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); - return Vmcnt | Expcnt | Lgkmcnt; -} - -unsigned getVmcntBitMask(IsaVersion Version) { +unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { return (1 << getVmcntBitWidth()) - 1; } -unsigned getExpcntBitMask(IsaVersion Version) { +unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { return (1 << getExpcntBitWidth()) - 1; } -unsigned getLgkmcntBitMask(IsaVersion Version) { +unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { return (1 << getLgkmcntBitWidth()) - 1; } -unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) { +unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { + unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth()); + unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); + unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); + return Vmcnt | Expcnt | Lgkmcnt; +} + +unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth()); } -unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) { +unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); } -unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) { +unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); } -void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt, +void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { Vmcnt = decodeVmcnt(Version, Waitcnt); Expcnt = decodeExpcnt(Version, Waitcnt); Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); } -unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) { +unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, + unsigned Vmcnt) { return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth()); } -unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) { +unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, + unsigned Expcnt) { return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); } -unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) { +unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, + unsigned Lgkmcnt) { return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); } -unsigned encodeWaitcnt(IsaVersion Version, +unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { unsigned Waitcnt = getWaitcntBitMask(Version); Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); Index: test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll =================================================================== --- test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll +++ test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s ; check llc does not crash for invalid opencl version metadata -; CHECK: { amd.MDVersion: [ 2, 0 ] } +; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } } !opencl.ocl.version = !{} Index: test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll =================================================================== --- test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll +++ test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s ; check llc does not crash for invalid opencl version metadata -; CHECK: { amd.MDVersion: [ 2, 0 ] } +; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } } !opencl.ocl.version = !{!0} !0 = !{} Index: test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll =================================================================== --- test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll +++ test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s ; check llc does not crash for invalid opencl version metadata -; CHECK: { amd.MDVersion: [ 2, 0 ] } +; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } } !opencl.ocl.version = !{!0} !0 = !{i32 1} Index: test/CodeGen/AMDGPU/runtime-metadata.ll =================================================================== --- test/CodeGen/AMDGPU/runtime-metadata.ll +++ test/CodeGen/AMDGPU/runtime-metadata.ll @@ -11,7 +11,7 @@ %opencl.clk_event_t = type opaque ; CHECK: --- -; CHECK-NEXT: { amd.MDVersion: [ 2, 0 ], amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: +; CHECK-NEXT: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: ; CHECK-NEXT: - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: ; CHECK-NEXT: - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }