Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -119,7 +119,7 @@ "AMD", "AMDGPU"); // Emit runtime metadata. - TS->EmitRuntimeMetadata(M); + TS->EmitRuntimeMetadata(STI->getFeatureBits(), M); } bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( Index: lib/Target/AMDGPU/AMDGPURuntimeMetadata.h =================================================================== --- lib/Target/AMDGPU/AMDGPURuntimeMetadata.h +++ lib/Target/AMDGPU/AMDGPURuntimeMetadata.h @@ -42,36 +42,40 @@ // Version and revision of runtime metadata const unsigned char MDVersion = 2; - const unsigned char MDRevision = 0; + const unsigned char MDRevision = 1; // Name of keys for runtime metadata. namespace KeyName { - const char MDVersion[] = "amd.MDVersion"; // Runtime metadata version - const char Language[] = "amd.Language"; // Language - const char LanguageVersion[] = "amd.LanguageVersion"; // Language version - const char Kernels[] = "amd.Kernels"; // Kernels - const char KernelName[] = "amd.KernelName"; // Kernel name - const char Args[] = "amd.Args"; // Kernel arguments - const char ArgSize[] = "amd.ArgSize"; // Kernel arg size - const char ArgAlign[] = "amd.ArgAlign"; // Kernel arg alignment - const char ArgTypeName[] = "amd.ArgTypeName"; // Kernel type name - const char ArgName[] = "amd.ArgName"; // Kernel name - const char ArgKind[] = "amd.ArgKind"; // Kernel argument kind - const char ArgValueType[] = "amd.ArgValueType"; // Kernel argument value type - const char ArgAddrQual[] = "amd.ArgAddrQual"; // Kernel argument address qualifier - const char ArgAccQual[] = "amd.ArgAccQual"; // Kernel argument access qualifier - const char ArgIsConst[] = "amd.ArgIsConst"; // Kernel argument is const qualified - const char ArgIsRestrict[] = "amd.ArgIsRestrict"; // Kernel argument is restrict qualified - const char ArgIsVolatile[] = "amd.ArgIsVolatile"; // Kernel argument is volatile qualified - const char ArgIsPipe[] = "amd.ArgIsPipe"; // Kernel argument is pipe qualified - const char ReqdWorkGroupSize[] = "amd.ReqdWorkGroupSize"; // Required work group size - const char WorkGroupSizeHint[] = "amd.WorkGroupSizeHint"; // Work group size hint - const char VecTypeHint[] = "amd.VecTypeHint"; // Vector type hint - const char KernelIndex[] = "amd.KernelIndex"; // Kernel index for device enqueue - const char NoPartialWorkGroups[] = "amd.NoPartialWorkGroups"; // No partial work groups - const char PrintfInfo[] = "amd.PrintfInfo"; // Prinf function call information - const char ArgActualAcc[] = "amd.ArgActualAcc"; // The actual kernel argument access qualifier - const char ArgPointeeAlign[] = "amd.ArgPointeeAlign"; // Alignment of pointee type + const char MDVersion[] = "amd.MDVersion"; // Runtime metadata version + const char TargetInfo[] = "amd.TargetInfo"; // Target information + const char TargetInfoMaxNumSGPRs[] = "amd.MaxNumSGPRs"; // Target's maximum number of SGPRs + const char TargetInfoMaxNumVGPRs[] = "amd.MaxNumVGPRs"; // Target's maximum number of VGPRs + const char TargetInfoMaxLocalMemorySize[] = "amd.MaxLocalMemorySize"; // Target's maximum local memory size in bytes + const char Language[] = "amd.Language"; // Language + const char LanguageVersion[] = "amd.LanguageVersion"; // Language version + const char Kernels[] = "amd.Kernels"; // Kernels + const char KernelName[] = "amd.KernelName"; // Kernel name + const char Args[] = "amd.Args"; // Kernel arguments + const char ArgSize[] = "amd.ArgSize"; // Kernel arg size + const char ArgAlign[] = "amd.ArgAlign"; // Kernel arg alignment + const char ArgTypeName[] = "amd.ArgTypeName"; // Kernel type name + const char ArgName[] = "amd.ArgName"; // Kernel name + const char ArgKind[] = "amd.ArgKind"; // Kernel argument kind + const char ArgValueType[] = "amd.ArgValueType"; // Kernel argument value type + const char ArgAddrQual[] = "amd.ArgAddrQual"; // Kernel argument address qualifier + const char ArgAccQual[] = "amd.ArgAccQual"; // Kernel argument access qualifier + const char ArgIsConst[] = "amd.ArgIsConst"; // Kernel argument is const qualified + const char ArgIsRestrict[] = "amd.ArgIsRestrict"; // Kernel argument is restrict qualified + const char ArgIsVolatile[] = "amd.ArgIsVolatile"; // Kernel argument is volatile qualified + const char ArgIsPipe[] = "amd.ArgIsPipe"; // Kernel argument is pipe qualified + const char ReqdWorkGroupSize[] = "amd.ReqdWorkGroupSize"; // Required work group size + const char WorkGroupSizeHint[] = "amd.WorkGroupSizeHint"; // Work group size hint + const char VecTypeHint[] = "amd.VecTypeHint"; // Vector type hint + const char KernelIndex[] = "amd.KernelIndex"; // Kernel index for device enqueue + const char NoPartialWorkGroups[] = "amd.NoPartialWorkGroups"; // No partial work groups + const char PrintfInfo[] = "amd.PrintfInfo"; // Prinf function call information + const char ArgActualAcc[] = "amd.ArgActualAcc"; // The actual kernel argument access qualifier + const char ArgPointeeAlign[] = "amd.ArgPointeeAlign"; // Alignment of pointee type } namespace KernelArg { @@ -168,10 +172,21 @@ }; } + namespace TargetInfo { + // In-memory representation of target information. + struct Metadata { + uint32_t MaxNumSGPRs; + uint32_t MaxNumVGPRs; + uint32_t MaxLocalMemorySize; + Metadata() : MaxNumSGPRs(0), MaxNumVGPRs(0), MaxLocalMemorySize(0) {} + }; + } + namespace Program { // In-memory representation of program information. struct Metadata { std::vector MDVersionSeq; + TargetInfo::Metadata TargetInfo; std::vector PrintfInfo; std::vector Kernels; Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h @@ -20,7 +20,7 @@ class Module; // Get runtime metadata as YAML string. -std::string getRuntimeMDYAMLString(Module &M); +std::string getRuntimeMDYAMLString(const FeatureBitset &Features, Module &M); } #endif Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp @@ -16,6 +16,7 @@ #include "AMDGPU.h" #include "AMDGPURuntimeMetadata.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Constants.h" @@ -83,9 +84,20 @@ static const bool flow = true; }; +template <> struct MappingTraits { + static void mapping(IO &YamlIO, TargetInfo::Metadata &TI) { + YamlIO.mapRequired(KeyName::TargetInfoMaxNumSGPRs, TI.MaxNumSGPRs); + YamlIO.mapRequired(KeyName::TargetInfoMaxNumVGPRs, TI.MaxNumVGPRs); + YamlIO.mapRequired(KeyName::TargetInfoMaxLocalMemorySize, + TI.MaxLocalMemorySize); + } + static const bool flow = true; +}; + template <> struct MappingTraits { static void mapping(IO &YamlIO, Program::Metadata &Prog) { YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq); + YamlIO.mapRequired(KeyName::TargetInfo, Prog.TargetInfo); YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo); YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels); } @@ -374,10 +386,14 @@ } } -std::string llvm::getRuntimeMDYAMLString(Module &M) { +std::string llvm::getRuntimeMDYAMLString(const FeatureBitset &Features, + Module &M) { Program::Metadata Prog; Prog.MDVersionSeq.push_back(MDVersion); Prog.MDVersionSeq.push_back(MDRevision); + Prog.TargetInfo.MaxNumSGPRs = AMDGPU::getNumAddressableSGPRs(Features); + Prog.TargetInfo.MaxNumVGPRs = AMDGPU::getTotalNumVGPRs(); + Prog.TargetInfo.MaxLocalMemorySize = AMDGPU::getLocalMemorySize(Features); // Set PrintfInfo. if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) { Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -46,7 +46,8 @@ virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0; - virtual void EmitRuntimeMetadata(Module &M) = 0; + virtual void EmitRuntimeMetadata(const FeatureBitset &Features, + Module &M) = 0; virtual void EmitRuntimeMetadata(StringRef Metadata) = 0; }; @@ -70,7 +71,7 @@ void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - void EmitRuntimeMetadata(Module &M) override; + void EmitRuntimeMetadata(const FeatureBitset &Features, Module &M) override; void EmitRuntimeMetadata(StringRef Metadata) override; }; @@ -101,7 +102,7 @@ void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - void EmitRuntimeMetadata(Module &M) override; + void EmitRuntimeMetadata(const FeatureBitset &Features, Module &M) override; void EmitRuntimeMetadata(StringRef Metadata) override; }; Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -93,9 +93,10 @@ OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n'; } -void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(Module &M) { +void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(const FeatureBitset &Features, + Module &M) { OS << "\t.amdgpu_runtime_metadata\n"; - OS << getRuntimeMDYAMLString(M); + OS << getRuntimeMDYAMLString(Features, M); OS << "\n\t.end_amdgpu_runtime_metadata\n"; } @@ -236,6 +237,7 @@ ); } -void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(Module &M) { - EmitRuntimeMetadata(getRuntimeMDYAMLString(M)); +void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(const FeatureBitset &Features, + Module &M) { + EmitRuntimeMetadata(getRuntimeMDYAMLString(Features, M)); } Index: lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.h +++ lib/Target/AMDGPU/SIRegisterInfo.h @@ -17,6 +17,7 @@ #include "AMDGPURegisterInfo.h" #include "SIDefines.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" namespace llvm { @@ -236,7 +237,7 @@ /// \returns Total number of VGPRs supported by the subtarget. unsigned getTotalNumVGPRs() const { - return 256; + return AMDGPU::getTotalNumVGPRs(); } /// \returns Number of reserved VGPRs for debugger use supported by the Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1155,15 +1155,11 @@ } unsigned SIRegisterInfo::getTotalNumSGPRs(const SISubtarget &ST) const { - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) - return 800; - return 512; + return AMDGPU::getTotalNumSGPRs(ST.getFeatureBits()); } unsigned SIRegisterInfo::getNumAddressableSGPRs(const SISubtarget &ST) const { - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) - return 102; - return 104; + return AMDGPU::getNumAddressableSGPRs(ST.getFeatureBits()); } unsigned SIRegisterInfo::getNumReservedSGPRs(const SISubtarget &ST, Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -36,6 +36,18 @@ LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); +/// \returns Total number of SGPRs supported by the subtarget. +unsigned getTotalNumSGPRs(const FeatureBitset &Features); + +/// \returns Number of addressable SGPRs supported by the subtarget. +unsigned getNumAddressableSGPRs(const FeatureBitset &Features); + +/// \returns Total number of VGPRs supported by the subtarget. +unsigned getTotalNumVGPRs(); + +/// \returns Size of the local memory in bytes supported by the subtarget. +unsigned getLocalMemorySize(const FeatureBitset &Features); + struct IsaVersion { unsigned Major; unsigned Minor; @@ -43,8 +55,10 @@ }; IsaVersion getIsaVersion(const FeatureBitset &Features); + void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features); + MCSection *getHSATextSection(MCContext &Ctx); MCSection *getHSADataGlobalAgentSection(MCContext &Ctx); Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -79,6 +79,45 @@ namespace llvm { namespace AMDGPU { +unsigned getTotalNumSGPRs(const FeatureBitset &Features) { + + IsaVersion IV = getIsaVersion(Features); + if (IV.Major >= 8) + return 800; + + return 512; +} + +unsigned getNumAddressableSGPRs(const FeatureBitset &Features) { + + if (Features.test(FeatureSGPRInitBug)) + return 96; // SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG. + + IsaVersion IV = getIsaVersion(Features); + if (IV.Major >= 8) + return 102; + + return 104; +} + +unsigned getTotalNumVGPRs() { + return 256; +} + +unsigned getLocalMemorySize(const FeatureBitset &Features) { + + if (Features.test(FeatureLocalMemorySize0)) + return 0; + + if (Features.test(FeatureLocalMemorySize32768)) + return 32768; + + if (Features.test(FeatureLocalMemorySize65536)) + return 65536; + + llvm_unreachable("unknown local memory size"); +} + IsaVersion getIsaVersion(const FeatureBitset &Features) { if (Features.test(FeatureISAVersion7_0_0)) Index: test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll =================================================================== --- test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll +++ test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s ; check llc does not crash for invalid opencl version metadata -; CHECK: { amd.MDVersion: [ 2, 0 ] } +; CHECK: { amd.MDVersion: [ 2, 1 ], amd.TargetInfo: { amd.MaxNumSGPRs: 104, amd.MaxNumVGPRs: 256, amd.MaxLocalMemorySize: 65536 } } !opencl.ocl.version = !{} Index: test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll =================================================================== --- test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll +++ test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s ; check llc does not crash for invalid opencl version metadata -; CHECK: { amd.MDVersion: [ 2, 0 ] } +; CHECK: { amd.MDVersion: [ 2, 1 ], amd.TargetInfo: { amd.MaxNumSGPRs: 104, amd.MaxNumVGPRs: 256, amd.MaxLocalMemorySize: 65536 } } !opencl.ocl.version = !{!0} !0 = !{} Index: test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll =================================================================== --- test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll +++ test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s ; check llc does not crash for invalid opencl version metadata -; CHECK: { amd.MDVersion: [ 2, 0 ] } +; CHECK: { amd.MDVersion: [ 2, 1 ], amd.TargetInfo: { amd.MaxNumSGPRs: 104, amd.MaxNumVGPRs: 256, amd.MaxLocalMemorySize: 65536 } } !opencl.ocl.version = !{!0} !0 = !{i32 1} Index: test/CodeGen/AMDGPU/runtime-metadata.ll =================================================================== --- test/CodeGen/AMDGPU/runtime-metadata.ll +++ test/CodeGen/AMDGPU/runtime-metadata.ll @@ -11,7 +11,7 @@ %opencl.clk_event_t = type opaque ; CHECK: --- -; CHECK-NEXT: { amd.MDVersion: [ 2, 0 ], amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: +; CHECK-NEXT: { amd.MDVersion: [ 2, 1 ], amd.TargetInfo: { amd.MaxNumSGPRs: {{102|104}}, amd.MaxNumVGPRs: 256, amd.MaxLocalMemorySize: 65536 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: ; CHECK-NEXT: - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: ; CHECK-NEXT: - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }