Index: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -789,12 +789,12 @@ : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser) { MCAsmParserExtension::Initialize(Parser); - if (getSTI().getFeatureBits().none()) { + if (getFeatureBits().none()) { // Set default features. copySTI().ToggleFeature("SOUTHERN_ISLANDS"); } - setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); + setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); { // TODO: make those pre-defined variables read-only. @@ -802,7 +802,7 @@ // MCSymbol::isRedefinable is intended for another purpose, and // AsmParser::parseDirectiveSet() cannot be specialized for specific target. AMDGPU::IsaInfo::IsaVersion ISA = - AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits()); + AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); MCContext &Ctx = getContext(); MCSymbol *Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); @@ -828,7 +828,7 @@ } bool hasInv2PiInlineImm() const { - return getSTI().getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; + return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; } bool hasSGPR102_SGPR103() const { @@ -850,6 +850,10 @@ return &MII; } + const FeatureBitset &getFeatureBits() const { + return getSTI().getFeatureBits(); + } + void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } @@ -1870,7 +1874,7 @@ // targeted GPU. if (getLexer().is(AsmToken::EndOfStatement)) { AMDGPU::IsaInfo::IsaVersion ISA = - AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits()); + AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); @@ -1947,7 +1951,8 @@ MS.flush(); - getTargetStreamer().EmitRuntimeMetadata(Metadata); + if (getTargetStreamer().EmitRuntimeMetadata(getFeatureBits(), Metadata)) + return Error(getParser().getTok().getLoc(), "invalid runtime metadata"); return false; } @@ -1965,7 +1970,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { amd_kernel_code_t Header; - AMDGPU::initDefaultAMDKernelCodeT(Header, getSTI().getFeatureBits()); + AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits()); while (true) { // Lex EndOfStatement. This is in a while loop, because lexing a comment @@ -2459,7 +2464,7 @@ Parser.Lex(); AMDGPU::IsaInfo::IsaVersion ISA = - AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits()); + AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); if (CntName == "vmcnt") IntVal = encodeVmcnt(ISA, IntVal, CntVal); else if (CntName == "expcnt") @@ -2475,7 +2480,7 @@ OperandMatchResultTy AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { AMDGPU::IsaInfo::IsaVersion ISA = - AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits()); + AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); int64_t Waitcnt = getWaitcntBitMask(ISA); SMLoc S = Parser.getTok().getLoc(); Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h @@ -14,15 +14,20 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H +#include "llvm/Support/ErrorOr.h" #include namespace llvm { class FeatureBitset; class Module; -// Get runtime metadata as YAML string. +/// \returns Runtime metadata as YAML string. std::string getRuntimeMDYAMLString(const FeatureBitset &Features, const Module &M); +/// \returns \p YAML if \p YAML is valid runtime metadata, error otherwise. +ErrorOr getRuntimeMDYAMLString(const FeatureBitset &Features, + StringRef YAML); + } #endif Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp @@ -38,6 +38,7 @@ #include using namespace llvm; +using namespace llvm::AMDGPU::IsaInfo; using namespace ::AMDGPU::RuntimeMD; static cl::opt @@ -88,7 +89,7 @@ INVALID_KERNEL_INDEX); YamlIO.mapOptional(KeyName::NoPartialWorkGroups, K.NoPartialWorkGroups, uint8_t(0)); - YamlIO.mapRequired(KeyName::Args, K.Args); + YamlIO.mapOptional(KeyName::Args, K.Args); } static const bool flow = true; }; @@ -116,7 +117,7 @@ template <> struct MappingTraits { static void mapping(IO &YamlIO, Program::Metadata &Prog) { YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq); - YamlIO.mapRequired(KeyName::IsaInfo, Prog.IsaInfo); + YamlIO.mapOptional(KeyName::IsaInfo, Prog.IsaInfo); YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo); YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels); } @@ -375,6 +376,20 @@ return Kernel; } +static void getIsaInfo(const FeatureBitset &Features, IsaInfo::Metadata &IIM) { + IIM.WavefrontSize = getWavefrontSize(Features); + IIM.LocalMemorySize = getLocalMemorySize(Features); + IIM.EUsPerCU = getEUsPerCU(Features); + IIM.MaxWavesPerEU = getMaxWavesPerEU(Features); + IIM.MaxFlatWorkGroupSize = getMaxFlatWorkGroupSize(Features); + IIM.SGPRAllocGranule = getSGPRAllocGranule(Features); + IIM.TotalNumSGPRs = getTotalNumSGPRs(Features); + IIM.AddressableNumSGPRs = getAddressableNumSGPRs(Features); + IIM.VGPRAllocGranule = getVGPRAllocGranule(Features); + IIM.TotalNumVGPRs = getTotalNumVGPRs(Features); + IIM.AddressableNumVGPRs = getAddressableNumVGPRs(Features); +} + Program::Metadata::Metadata(const std::string &YAML) { yaml::Input Input(YAML); Input >> *this; @@ -411,18 +426,7 @@ Prog.MDVersionSeq.push_back(MDVersion); Prog.MDVersionSeq.push_back(MDRevision); - IsaInfo::Metadata &IIM = Prog.IsaInfo; - IIM.WavefrontSize = AMDGPU::IsaInfo::getWavefrontSize(Features); - IIM.LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(Features); - IIM.EUsPerCU = AMDGPU::IsaInfo::getEUsPerCU(Features); - IIM.MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(Features); - IIM.MaxFlatWorkGroupSize = AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(Features); - IIM.SGPRAllocGranule = AMDGPU::IsaInfo::getSGPRAllocGranule(Features); - IIM.TotalNumSGPRs = AMDGPU::IsaInfo::getTotalNumSGPRs(Features); - IIM.AddressableNumSGPRs = AMDGPU::IsaInfo::getAddressableNumSGPRs(Features); - IIM.VGPRAllocGranule = AMDGPU::IsaInfo::getVGPRAllocGranule(Features); - IIM.TotalNumVGPRs = AMDGPU::IsaInfo::getTotalNumVGPRs(Features); - IIM.AddressableNumVGPRs = AMDGPU::IsaInfo::getAddressableNumVGPRs(Features); + getIsaInfo(Features, Prog.IsaInfo); // Set PrintfInfo. if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) { @@ -451,3 +455,16 @@ return YAML; } + +ErrorOr llvm::getRuntimeMDYAMLString(const FeatureBitset &Features, + StringRef YAML) { + Program::Metadata Prog; + yaml::Input Input(YAML); + Input >> Prog; + + getIsaInfo(Features, Prog.IsaInfo); + + if (Input.error()) + return Input.error(); + return Prog.toYAML(); +} Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -50,7 +50,9 @@ virtual void EmitRuntimeMetadata(const FeatureBitset &Features, const Module &M) = 0; - virtual void EmitRuntimeMetadata(StringRef Metadata) = 0; + /// \returns False on success, true on failure. + virtual bool EmitRuntimeMetadata(const FeatureBitset &Features, + StringRef Metadata) = 0; }; class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer { @@ -75,7 +77,9 @@ void EmitRuntimeMetadata(const FeatureBitset &Features, const Module &M) override; - void EmitRuntimeMetadata(StringRef Metadata) override; + /// \returns False on success, true on failure. + bool EmitRuntimeMetadata(const FeatureBitset &Features, + StringRef Metadata) override; }; class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer { @@ -107,7 +111,9 @@ void EmitRuntimeMetadata(const FeatureBitset &Features, const Module &M) override; - void EmitRuntimeMetadata(StringRef Metadata) override; + /// \returns False on success, true on failure. + bool EmitRuntimeMetadata(const FeatureBitset &Features, + StringRef Metadata) override; }; } Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -100,10 +100,17 @@ OS << "\n\t.end_amdgpu_runtime_metadata\n"; } -void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(StringRef Metadata) { +bool AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(const FeatureBitset &Features, + StringRef Metadata) { + auto VerifiedMetadata = getRuntimeMDYAMLString(Features, Metadata); + if (!VerifiedMetadata) + return true; + OS << "\t.amdgpu_runtime_metadata"; - OS << Metadata; + OS << VerifiedMetadata.get(); OS << "\t.end_amdgpu_runtime_metadata\n"; + + return false; } //===----------------------------------------------------------------------===// @@ -216,7 +223,12 @@ Symbol->setBinding(ELF::STB_GLOBAL); } -void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(StringRef Metadata) { +bool AMDGPUTargetELFStreamer::EmitRuntimeMetadata(const FeatureBitset &Features, + StringRef Metadata) { + auto VerifiedMetadata = getRuntimeMDYAMLString(Features, Metadata); + if (!VerifiedMetadata) + return true; + // Create two labels to mark the beginning and end of the desc field // and a MCExpr to calculate the size of the desc field. auto &Context = getContext(); @@ -231,13 +243,15 @@ PT_NOTE::NT_AMDGPU_HSA_RUNTIME_METADATA, [&](MCELFStreamer &OS) { OS.EmitLabel(DescBegin); - OS.EmitBytes(Metadata); + OS.EmitBytes(VerifiedMetadata.get()); OS.EmitLabel(DescEnd); } ); + + return false; } void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(const FeatureBitset &Features, const Module &M) { - EmitRuntimeMetadata(getRuntimeMDYAMLString(Features, M)); + EmitRuntimeMetadata(Features, getRuntimeMDYAMLString(Features, M)); } Index: llvm/trunk/test/MC/AMDGPU/hsa.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/hsa.s +++ llvm/trunk/test/MC/AMDGPU/hsa.s @@ -48,13 +48,13 @@ .end_amdgpu_runtime_metadata // ASM: .amdgpu_runtime_metadata -// ASM: { -// ASM: amd.MDVersion: [ 2, 0 ] -// ASM: amd.Kernels: [ -// ASM: { amd.KernelName: amd_kernel_code_t_test_all }, -// ASM: { amd.KernelName: amd_kernel_code_t_minimal } -// ASM: ] -// ASM: } +// ASM: { +// ASM: amd.MDVersion: [ 2, 0 ] +// ASM: amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, +// ASM: amd.Kernels: +// ASM: - { amd.KernelName: amd_kernel_code_t_test_all } +// ASM: - { amd.KernelName: amd_kernel_code_t_minimal } +// ASM: } // ASM: .end_amdgpu_runtime_metadata .amdgpu_hsa_kernel amd_kernel_code_t_test_all Index: llvm/trunk/test/MC/AMDGPU/metadata.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/metadata.s +++ llvm/trunk/test/MC/AMDGPU/metadata.s @@ -1,35 +0,0 @@ -// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | FileCheck %s --check-prefix=ASM - -.amdgpu_runtime_metadata - { amd.MDVersion: [ 2, 0 ], amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: - - - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: - - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } - - - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: - - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } - - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } - } -.end_amdgpu_runtime_metadata - -// ASM: { amd.MDVersion: [ 2, 0 ], amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: -// ASM: - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -// ASM: - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 } -// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -// ASM: - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: -// ASM: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 } -// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } -// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } -// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } -// ASM: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } -// ASM: } Index: llvm/trunk/test/MC/AMDGPU/runtime-metadata-1.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/runtime-metadata-1.s +++ llvm/trunk/test/MC/AMDGPU/runtime-metadata-1.s @@ -0,0 +1,39 @@ +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX700 +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX800 +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX900 + +.amdgpu_runtime_metadata + { amd.MDVersion: [ 2, 1 ], amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: + - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: + - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } + - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: + - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } + } +.end_amdgpu_runtime_metadata + +// GFX700: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: + +// GFX800: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 96, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: + +// GFX900: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 102, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: + +// GCN: - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: +// GCN-NEXT: - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 } +// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } +// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } +// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } +// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } +// GCN-NEXT: - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: +// GCN-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 } +// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } +// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } +// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } +// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } } Index: llvm/trunk/test/MC/AMDGPU/runtime-metadata-2.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/runtime-metadata-2.s +++ llvm/trunk/test/MC/AMDGPU/runtime-metadata-2.s @@ -0,0 +1,39 @@ +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX700 +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX800 +// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX900 + +.amdgpu_runtime_metadata + { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: + - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: + - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } + - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: + - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } + } +.end_amdgpu_runtime_metadata + +// GFX700: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: + +// GFX800: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 96, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: + +// GFX900: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 102, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: + +// GCN: - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: +// GCN-NEXT: - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 } +// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } +// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } +// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } +// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } +// GCN-NEXT: - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: +// GCN-NEXT: - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 } +// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } +// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } +// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } +// GCN-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } } Index: llvm/trunk/test/MC/AMDGPU/runtime-metadata-invalid-1.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/runtime-metadata-invalid-1.s +++ llvm/trunk/test/MC/AMDGPU/runtime-metadata-invalid-1.s @@ -0,0 +1,106 @@ +; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 %s 2>&1 | FileCheck %s +; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj %s 2>&1 | FileCheck %s +; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 %s 2>&1 | FileCheck %s +; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj %s 2>&1 | FileCheck %s +; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 %s 2>&1 | FileCheck %s +; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj %s 2>&1 | FileCheck %s + +; CHECK: error: unknown key 'amd.RandomUnknownKey' + + .text + .hsa_code_object_version 2,1 + .hsa_code_object_isa 8,0,3,"AMD","AMDGPU" + .amdgpu_runtime_metadata +--- +{ amd.MDVersion: [ 2, 1 ], amd.RandomUnknownKey, amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 102, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.Kernels: + - { amd.KernelName: test, amd.Language: OpenCL C, amd.LanguageVersion: [ 1, 0 ], amd.Args: + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int*', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 } + - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } } } +... + + .end_amdgpu_runtime_metadata + .globl test + .p2align 8 + .type test,@function + .amdgpu_hsa_kernel test +test: ; @test + .amd_kernel_code_t + amd_code_version_major = 1 + amd_code_version_minor = 0 + amd_machine_kind = 1 + amd_machine_version_major = 8 + amd_machine_version_minor = 0 + amd_machine_version_stepping = 3 + kernel_code_entry_byte_offset = 256 + kernel_code_prefetch_byte_size = 0 + max_scratch_backing_memory_byte_size = 0 + granulated_workitem_vgpr_count = 0 + granulated_wavefront_sgpr_count = 0 + priority = 0 + float_mode = 192 + priv = 0 + enable_dx10_clamp = 1 + debug_mode = 0 + enable_ieee_mode = 1 + enable_sgpr_private_segment_wave_byte_offset = 0 + user_sgpr_count = 6 + enable_trap_handler = 1 + enable_sgpr_workgroup_id_x = 1 + enable_sgpr_workgroup_id_y = 0 + enable_sgpr_workgroup_id_z = 0 + enable_sgpr_workgroup_info = 0 + enable_vgpr_workitem_id = 0 + enable_exception_msb = 0 + granulated_lds_size = 0 + enable_exception = 0 + enable_sgpr_private_segment_buffer = 1 + enable_sgpr_dispatch_ptr = 0 + enable_sgpr_queue_ptr = 0 + enable_sgpr_kernarg_segment_ptr = 1 + enable_sgpr_dispatch_id = 0 + enable_sgpr_flat_scratch_init = 0 + enable_sgpr_private_segment_size = 0 + enable_sgpr_grid_workgroup_count_x = 0 + enable_sgpr_grid_workgroup_count_y = 0 + enable_sgpr_grid_workgroup_count_z = 0 + enable_ordered_append_gds = 0 + private_element_size = 1 + is_ptr64 = 1 + is_dynamic_callstack = 0 + is_debug_enabled = 0 + is_xnack_enabled = 0 + workitem_private_segment_byte_size = 0 + workgroup_group_segment_byte_size = 0 + gds_segment_byte_size = 0 + kernarg_segment_byte_size = 8 + workgroup_fbarrier_count = 0 + wavefront_sgpr_count = 6 + workitem_vgpr_count = 3 + reserved_vgpr_first = 0 + reserved_vgpr_count = 0 + reserved_sgpr_first = 0 + reserved_sgpr_count = 0 + debug_wavefront_private_segment_offset_sgpr = 0 + debug_private_segment_buffer_sgpr = 0 + kernarg_segment_alignment = 4 + group_segment_alignment = 4 + private_segment_alignment = 4 + wavefront_size = 6 + call_convention = -1 + runtime_loader_kernel_symbol = 0 + .end_amd_kernel_code_t +; BB#0: ; %entry + s_load_dwordx2 s[0:1], s[4:5], 0x0 + v_mov_b32_e32 v2, 0x309 + s_waitcnt lgkmcnt(0) + v_mov_b32_e32 v0, s0 + v_mov_b32_e32 v1, s1 + flat_store_dword v[0:1], v2 + s_endpgm +.Lfunc_end0: + .size test, .Lfunc_end0-test + + .ident "" + .section ".note.GNU-stack"