Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -26,6 +26,7 @@ namespace llvm { +class AMDGPUTargetStreamer; class MCOperand; class AMDGPUAsmPrinter final : public AsmPrinter { @@ -103,10 +104,12 @@ explicit AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer); - bool runOnMachineFunction(MachineFunction &MF) override; - StringRef getPassName() const override; + AMDGPUTargetStreamer& getTargetStreamer() const; + + bool runOnMachineFunction(MachineFunction &MF) override; + /// \brief Wrapper for MCInstLowering.lowerOperand() for the tblgen'erated /// pseudo lowering. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const; @@ -132,6 +135,8 @@ void EmitStartOfAsmFile(Module &M) override; + void EmitEndOfAsmFile(Module &M) override; + bool isBlockOnlyReachableByFallthrough( const MachineBasicBlock *MBB) const override; Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -99,23 +99,28 @@ return "AMDGPU Assembly Printer"; } +AMDGPUTargetStreamer& AMDGPUAsmPrinter::getTargetStreamer() const { + return static_cast(*OutStreamer->getTargetStreamer()); +} + void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { if (TM.getTargetTriple().getOS() != Triple::AMDHSA) return; - AMDGPUTargetStreamer *TS = - static_cast(OutStreamer->getTargetStreamer()); - - TS->EmitDirectiveHSACodeObjectVersion(2, 1); - const MCSubtargetInfo *STI = TM.getMCSubtargetInfo(); AMDGPU::IsaInfo::IsaVersion ISA = AMDGPU::IsaInfo::getIsaVersion(STI->getFeatureBits()); - TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping, - "AMD", "AMDGPU"); - // Emit runtime metadata. - TS->EmitRuntimeMetadata(STI->getFeatureBits(), M); + getTargetStreamer().EmitDirectiveHSACodeObjectVersion(2, 1); + getTargetStreamer().EmitDirectiveHSACodeObjectISA( + ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); + getTargetStreamer().EmitStartOfRuntimeMetadata(STI->getFeatureBits(), M); +} + +void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { + if (TM.getTargetTriple().getOS() != Triple::AMDHSA) + return; + getTargetStreamer().EmitEmitEndOfRuntimeMetadata(); } bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( @@ -140,17 +145,20 @@ getSIProgramInfo(KernelInfo, *MF); EmitAmdKernelCodeT(*MF, KernelInfo); } + + if (TM.getTargetTriple().getOS() != Triple::AMDHSA) + return; + getTargetStreamer().EmitKernelRuntimeMetadata(*MF->getFunction()); } void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { const SIMachineFunctionInfo *MFI = MF->getInfo(); const AMDGPUSubtarget &STM = MF->getSubtarget(); if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) { - AMDGPUTargetStreamer *TS = - static_cast(OutStreamer->getTargetStreamer()); SmallString<128> SymbolName; getNameWithPrefix(SymbolName, MF->getFunction()), - TS->EmitAMDGPUSymbolType(SymbolName, ELF::STT_AMDGPU_HSA_KERNEL); + getTargetStreamer().EmitAMDGPUSymbolType( + SymbolName, ELF::STT_AMDGPU_HSA_KERNEL); } AsmPrinter::EmitFunctionEntryLabel(); @@ -802,11 +810,8 @@ KernelInfo.DebuggerPrivateSegmentBufferSGPR; } - AMDGPUTargetStreamer *TS = - static_cast(OutStreamer->getTargetStreamer()); - OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); - TS->EmitAMDKernelCodeT(header); + getTargetStreamer().EmitAMDKernelCodeT(header); } bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, Index: lib/Target/AMDGPU/AMDGPURuntimeMetadata.h =================================================================== --- lib/Target/AMDGPU/AMDGPURuntimeMetadata.h +++ lib/Target/AMDGPU/AMDGPURuntimeMetadata.h @@ -36,8 +36,9 @@ #include #include +namespace llvm { namespace AMDGPU { -namespace RuntimeMD { +namespace RuntimeMetadata { // Version and revision of runtime metadata const unsigned char MDVersion = 2; @@ -273,18 +274,19 @@ explicit Metadata() = default; // Construct from an YAML string. - explicit Metadata(const std::string &YAML); + explicit Metadata(const std::string &YamlString); // Convert to YAML string. - std::string toYAML(); + std::string toYamlString(); // Convert from YAML string. - static Metadata fromYAML(const std::string &S); + static Metadata fromYamlString(const std::string &YamlString); }; } //end namespace Program -} // end namespace RuntimeMD +} // end namespace RuntimeMetadata } // end namespace AMDGPU +} // end namespace llvm #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h +++ /dev/null @@ -1,28 +0,0 @@ -//===- AMDGPURuntimeMD.h - Generate runtime metadata ---------------*- C++ -*-// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares functions for generating runtime metadata. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H -#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H - -#include - -namespace llvm { -class FeatureBitset; -class Module; - -// Get runtime metadata as YAML string. -std::string getRuntimeMDYAMLString(const FeatureBitset &Features, - const Module &M); - -} -#endif Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp +++ /dev/null @@ -1,453 +0,0 @@ -//===-- AMDGPURuntimeMD.cpp - Generates runtime metadata ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// -/// Generates AMDGPU runtime metadata for YAML mapping. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "AMDGPURuntimeMetadata.h" -#include "MCTargetDesc/AMDGPURuntimeMD.h" -#include "Utils/AMDGPUBaseInfo.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/YAMLTraits.h" -#include -#include -#include -#include - -using namespace llvm; -using namespace ::AMDGPU::RuntimeMD; - -static cl::opt -DumpRuntimeMD("amdgpu-dump-rtmd", - cl::desc("Dump AMDGPU runtime metadata")); - -static cl::opt -CheckRuntimeMDParser("amdgpu-check-rtmd-parser", cl::Hidden, - cl::desc("Check AMDGPU runtime metadata YAML parser")); - -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint8_t) -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t) -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) -LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata) -LLVM_YAML_IS_SEQUENCE_VECTOR(KernelArg::Metadata) - -namespace llvm { -namespace yaml { - -template <> struct MappingTraits { - static void mapping(IO &YamlIO, KernelArg::Metadata &A) { - YamlIO.mapRequired(KeyName::ArgSize, A.Size); - YamlIO.mapRequired(KeyName::ArgAlign, A.Align); - YamlIO.mapOptional(KeyName::ArgPointeeAlign, A.PointeeAlign, 0U); - YamlIO.mapRequired(KeyName::ArgKind, A.Kind); - YamlIO.mapRequired(KeyName::ArgValueType, A.ValueType); - YamlIO.mapOptional(KeyName::ArgTypeName, A.TypeName, std::string()); - YamlIO.mapOptional(KeyName::ArgName, A.Name, std::string()); - YamlIO.mapOptional(KeyName::ArgAddrQual, A.AddrQual, INVALID_ADDR_QUAL); - YamlIO.mapOptional(KeyName::ArgAccQual, A.AccQual, INVALID_ACC_QUAL); - YamlIO.mapOptional(KeyName::ArgIsVolatile, A.IsVolatile, uint8_t(0)); - YamlIO.mapOptional(KeyName::ArgIsConst, A.IsConst, uint8_t(0)); - YamlIO.mapOptional(KeyName::ArgIsRestrict, A.IsRestrict, uint8_t(0)); - YamlIO.mapOptional(KeyName::ArgIsPipe, A.IsPipe, uint8_t(0)); - } - static const bool flow = true; -}; - -template <> struct MappingTraits { - static void mapping(IO &YamlIO, Kernel::Metadata &K) { - YamlIO.mapRequired(KeyName::KernelName, K.Name); - YamlIO.mapOptional(KeyName::Language, K.Language, std::string()); - YamlIO.mapOptional(KeyName::LanguageVersion, K.LanguageVersion); - YamlIO.mapOptional(KeyName::ReqdWorkGroupSize, K.ReqdWorkGroupSize); - YamlIO.mapOptional(KeyName::WorkGroupSizeHint, K.WorkGroupSizeHint); - YamlIO.mapOptional(KeyName::VecTypeHint, K.VecTypeHint, std::string()); - YamlIO.mapOptional(KeyName::KernelIndex, K.KernelIndex, - INVALID_KERNEL_INDEX); - YamlIO.mapOptional(KeyName::NoPartialWorkGroups, K.NoPartialWorkGroups, - uint8_t(0)); - YamlIO.mapRequired(KeyName::Args, K.Args); - } - static const bool flow = true; -}; - -template <> struct MappingTraits { - static void mapping(IO &YamlIO, IsaInfo::Metadata &I) { - YamlIO.mapRequired(KeyName::IsaInfoWavefrontSize, I.WavefrontSize); - YamlIO.mapRequired(KeyName::IsaInfoLocalMemorySize, I.LocalMemorySize); - YamlIO.mapRequired(KeyName::IsaInfoEUsPerCU, I.EUsPerCU); - YamlIO.mapRequired(KeyName::IsaInfoMaxWavesPerEU, I.MaxWavesPerEU); - YamlIO.mapRequired(KeyName::IsaInfoMaxFlatWorkGroupSize, - I.MaxFlatWorkGroupSize); - YamlIO.mapRequired(KeyName::IsaInfoSGPRAllocGranule, I.SGPRAllocGranule); - YamlIO.mapRequired(KeyName::IsaInfoTotalNumSGPRs, I.TotalNumSGPRs); - YamlIO.mapRequired(KeyName::IsaInfoAddressableNumSGPRs, - I.AddressableNumSGPRs); - YamlIO.mapRequired(KeyName::IsaInfoVGPRAllocGranule, I.VGPRAllocGranule); - YamlIO.mapRequired(KeyName::IsaInfoTotalNumVGPRs, I.TotalNumVGPRs); - YamlIO.mapRequired(KeyName::IsaInfoAddressableNumVGPRs, - I.AddressableNumVGPRs); - } - static const bool flow = true; -}; - -template <> struct MappingTraits { - static void mapping(IO &YamlIO, Program::Metadata &Prog) { - YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq); - YamlIO.mapRequired(KeyName::IsaInfo, Prog.IsaInfo); - YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo); - YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels); - } - static const bool flow = true; -}; - -} // end namespace yaml -} // end namespace llvm - -// Get a vector of three integer values from MDNode \p Node; -static std::vector getThreeInt32(MDNode *Node) { - assert(Node->getNumOperands() == 3); - std::vector V; - for (const MDOperand &Op : Node->operands()) { - const ConstantInt *CI = mdconst::extract(Op); - V.push_back(CI->getZExtValue()); - } - return V; -} - -static std::string getOCLTypeName(Type *Ty, bool Signed) { - switch (Ty->getTypeID()) { - case Type::HalfTyID: - return "half"; - case Type::FloatTyID: - return "float"; - case Type::DoubleTyID: - return "double"; - case Type::IntegerTyID: { - if (!Signed) - return (Twine('u') + getOCLTypeName(Ty, true)).str(); - unsigned BW = Ty->getIntegerBitWidth(); - switch (BW) { - case 8: - return "char"; - case 16: - return "short"; - case 32: - return "int"; - case 64: - return "long"; - default: - return (Twine('i') + Twine(BW)).str(); - } - } - case Type::VectorTyID: { - VectorType *VecTy = cast(Ty); - Type *EleTy = VecTy->getElementType(); - unsigned Size = VecTy->getVectorNumElements(); - return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str(); - } - default: - return "unknown"; - } -} - -static KernelArg::ValueType getRuntimeMDValueType( - Type *Ty, StringRef TypeName) { - switch (Ty->getTypeID()) { - case Type::HalfTyID: - return KernelArg::F16; - case Type::FloatTyID: - return KernelArg::F32; - case Type::DoubleTyID: - return KernelArg::F64; - case Type::IntegerTyID: { - bool Signed = !TypeName.startswith("u"); - switch (Ty->getIntegerBitWidth()) { - case 8: - return Signed ? KernelArg::I8 : KernelArg::U8; - case 16: - return Signed ? KernelArg::I16 : KernelArg::U16; - case 32: - return Signed ? KernelArg::I32 : KernelArg::U32; - case 64: - return Signed ? KernelArg::I64 : KernelArg::U64; - default: - // Runtime does not recognize other integer types. Report as struct type. - return KernelArg::Struct; - } - } - case Type::VectorTyID: - return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName); - case Type::PointerTyID: - return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName); - default: - return KernelArg::Struct; - } -} - -static KernelArg::AddressSpaceQualifer getRuntimeAddrSpace( - AMDGPUAS::AddressSpaces A) { - switch (A) { - case AMDGPUAS::GLOBAL_ADDRESS: - return KernelArg::Global; - case AMDGPUAS::CONSTANT_ADDRESS: - return KernelArg::Constant; - case AMDGPUAS::LOCAL_ADDRESS: - return KernelArg::Local; - case AMDGPUAS::FLAT_ADDRESS: - return KernelArg::Generic; - case AMDGPUAS::REGION_ADDRESS: - return KernelArg::Region; - default: - return KernelArg::Private; - } -} - -static KernelArg::Metadata getRuntimeMDForKernelArg(const DataLayout &DL, - Type *T, KernelArg::Kind Kind, StringRef BaseTypeName = "", - StringRef TypeName = "", StringRef ArgName = "", StringRef TypeQual = "", - StringRef AccQual = "") { - KernelArg::Metadata Arg; - - // Set ArgSize and ArgAlign. - Arg.Size = DL.getTypeAllocSize(T); - Arg.Align = DL.getABITypeAlignment(T); - if (auto PT = dyn_cast(T)) { - auto ET = PT->getElementType(); - if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized()) - Arg.PointeeAlign = DL.getABITypeAlignment(ET); - } - - // Set ArgTypeName. - Arg.TypeName = TypeName; - - // Set ArgName. - Arg.Name = ArgName; - - // Set ArgIsVolatile, ArgIsRestrict, ArgIsConst and ArgIsPipe. - SmallVector SplitQ; - TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */); - - for (StringRef KeyName : SplitQ) { - auto *P = StringSwitch(KeyName) - .Case("volatile", &Arg.IsVolatile) - .Case("restrict", &Arg.IsRestrict) - .Case("const", &Arg.IsConst) - .Case("pipe", &Arg.IsPipe) - .Default(nullptr); - if (P) - *P = 1; - } - - // Set ArgKind. - Arg.Kind = Kind; - - // Set ArgValueType. - Arg.ValueType = getRuntimeMDValueType(T, BaseTypeName); - - // Set ArgAccQual. - if (!AccQual.empty()) { - Arg.AccQual = StringSwitch(AccQual) - .Case("read_only", KernelArg::ReadOnly) - .Case("write_only", KernelArg::WriteOnly) - .Case("read_write", KernelArg::ReadWrite) - .Default(KernelArg::AccNone); - } - - // Set ArgAddrQual. - if (auto *PT = dyn_cast(T)) { - Arg.AddrQual = getRuntimeAddrSpace(static_cast( - PT->getAddressSpace())); - } - - return Arg; -} - -static Kernel::Metadata getRuntimeMDForKernel(const Function &F) { - Kernel::Metadata Kernel; - Kernel.Name = F.getName(); - auto &M = *F.getParent(); - - // Set Language and LanguageVersion. - if (auto MD = M.getNamedMetadata("opencl.ocl.version")) { - if (MD->getNumOperands() != 0) { - auto Node = MD->getOperand(0); - if (Node->getNumOperands() > 1) { - Kernel.Language = "OpenCL C"; - uint16_t Major = mdconst::extract(Node->getOperand(0)) - ->getZExtValue(); - uint16_t Minor = mdconst::extract(Node->getOperand(1)) - ->getZExtValue(); - Kernel.LanguageVersion.push_back(Major); - Kernel.LanguageVersion.push_back(Minor); - } - } - } - - const DataLayout &DL = F.getParent()->getDataLayout(); - for (auto &Arg : F.args()) { - unsigned I = Arg.getArgNo(); - Type *T = Arg.getType(); - auto TypeName = dyn_cast(F.getMetadata( - "kernel_arg_type")->getOperand(I))->getString(); - auto BaseTypeName = cast(F.getMetadata( - "kernel_arg_base_type")->getOperand(I))->getString(); - StringRef ArgName; - if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) - ArgName = cast(ArgNameMD->getOperand(I))->getString(); - auto TypeQual = cast(F.getMetadata( - "kernel_arg_type_qual")->getOperand(I))->getString(); - auto AccQual = cast(F.getMetadata( - "kernel_arg_access_qual")->getOperand(I))->getString(); - KernelArg::Kind Kind; - if (TypeQual.find("pipe") != StringRef::npos) - Kind = KernelArg::Pipe; - else Kind = StringSwitch(BaseTypeName) - .Case("sampler_t", KernelArg::Sampler) - .Case("queue_t", KernelArg::Queue) - .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t", - "image2d_t" , "image2d_array_t", KernelArg::Image) - .Cases("image2d_depth_t", "image2d_array_depth_t", - "image2d_msaa_t", "image2d_array_msaa_t", - "image2d_msaa_depth_t", KernelArg::Image) - .Cases("image2d_array_msaa_depth_t", "image3d_t", - KernelArg::Image) - .Default(isa(T) ? - (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ? - KernelArg::DynamicSharedPointer : - KernelArg::GlobalBuffer) : - KernelArg::ByValue); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, T, Kind, - BaseTypeName, TypeName, ArgName, TypeQual, AccQual)); - } - - // Emit hidden kernel arguments for OpenCL kernels. - if (F.getParent()->getNamedMetadata("opencl.ocl.version")) { - auto Int64T = Type::getInt64Ty(F.getContext()); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T, - KernelArg::HiddenGlobalOffsetX)); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T, - KernelArg::HiddenGlobalOffsetY)); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T, - KernelArg::HiddenGlobalOffsetZ)); - if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) { - auto Int8PtrT = Type::getInt8PtrTy(F.getContext(), - KernelArg::Global); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int8PtrT, - KernelArg::HiddenPrintfBuffer)); - } - } - - // Set ReqdWorkGroupSize, WorkGroupSizeHint, and VecTypeHint. - if (auto RWGS = F.getMetadata("reqd_work_group_size")) - Kernel.ReqdWorkGroupSize = getThreeInt32(RWGS); - - if (auto WGSH = F.getMetadata("work_group_size_hint")) - Kernel.WorkGroupSizeHint = getThreeInt32(WGSH); - - if (auto VTH = F.getMetadata("vec_type_hint")) - Kernel.VecTypeHint = getOCLTypeName(cast( - VTH->getOperand(0))->getType(), mdconst::extract( - VTH->getOperand(1))->getZExtValue()); - - return Kernel; -} - -Program::Metadata::Metadata(const std::string &YAML) { - yaml::Input Input(YAML); - Input >> *this; -} - -std::string Program::Metadata::toYAML() { - std::string Text; - raw_string_ostream Stream(Text); - yaml::Output Output(Stream, nullptr, - std::numeric_limits::max() /* do not wrap line */); - Output << *this; - return Stream.str(); -} - -Program::Metadata Program::Metadata::fromYAML(const std::string &S) { - return Program::Metadata(S); -} - -// Check if the YAML string can be parsed. -static void checkRuntimeMDYAMLString(const std::string &YAML) { - auto P = Program::Metadata::fromYAML(YAML); - auto S = P.toYAML(); - errs() << "AMDGPU runtime metadata parser test " - << (YAML == S ? "passes" : "fails") << ".\n"; - if (YAML != S) { - errs() << "First output: " << YAML << '\n' - << "Second output: " << S << '\n'; - } -} - -std::string llvm::getRuntimeMDYAMLString(const FeatureBitset &Features, - const Module &M) { - Program::Metadata Prog; - Prog.MDVersionSeq.push_back(MDVersion); - Prog.MDVersionSeq.push_back(MDRevision); - - IsaInfo::Metadata &IIM = Prog.IsaInfo; - IIM.WavefrontSize = AMDGPU::IsaInfo::getWavefrontSize(Features); - IIM.LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(Features); - IIM.EUsPerCU = AMDGPU::IsaInfo::getEUsPerCU(Features); - IIM.MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(Features); - IIM.MaxFlatWorkGroupSize = AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(Features); - IIM.SGPRAllocGranule = AMDGPU::IsaInfo::getSGPRAllocGranule(Features); - IIM.TotalNumSGPRs = AMDGPU::IsaInfo::getTotalNumSGPRs(Features); - IIM.AddressableNumSGPRs = AMDGPU::IsaInfo::getAddressableNumSGPRs(Features); - IIM.VGPRAllocGranule = AMDGPU::IsaInfo::getVGPRAllocGranule(Features); - IIM.TotalNumVGPRs = AMDGPU::IsaInfo::getTotalNumVGPRs(Features); - IIM.AddressableNumVGPRs = AMDGPU::IsaInfo::getAddressableNumVGPRs(Features); - - // Set PrintfInfo. - if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) { - for (unsigned I = 0; I < MD->getNumOperands(); ++I) { - auto Node = MD->getOperand(I); - if (Node->getNumOperands() > 0) - Prog.PrintfInfo.push_back(cast(Node->getOperand(0)) - ->getString()); - } - } - - // Set Kernels. - for (auto &F: M.functions()) { - if (!F.getMetadata("kernel_arg_type")) - continue; - Prog.Kernels.emplace_back(getRuntimeMDForKernel(F)); - } - - auto YAML = Prog.toYAML(); - - if (DumpRuntimeMD) - errs() << "AMDGPU runtime metadata:\n" << YAML << '\n'; - - if (CheckRuntimeMDParser) - checkRuntimeMDYAMLString(YAML); - - return YAML; -} Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMetadataStreamer.h =================================================================== --- /dev/null +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMetadataStreamer.h @@ -0,0 +1,85 @@ +//===--- AMDGPURuntimeMetadataStreamer.h - Streams Runtime Metadata -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMETADATASTREAMER_H +#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMETADATASTREAMER_H + +#include "AMDGPURuntimeMetadata.h" +#include "llvm/ADT/StringRef.h" +#include +#include +#include + +namespace llvm { + +class Argument; +class DataLayout; +class FeatureBitset; +class Function; +class MDNode; +class Module; +class Type; + +namespace AMDGPU { +namespace RuntimeMetadata { + +class Streamer final { +private: + typedef KernelArg::AddressSpaceQualifer AddressSpaceQualifer; + typedef KernelArg::ValueType ValueType; + typedef std::vector WorkGroupDimensions; + + Program::Metadata Program; + + void dump(const std::string &YamlString) const; + + void verify(const std::string &YamlString) const; + + AddressSpaceQualifer getAddressSpaceQualifer(unsigned AddressSpace) const; + + std::string getTypeName(Type *Ty, bool Signed) const; + + ValueType getValueType(Type *Ty, StringRef TypeName) const; + + WorkGroupDimensions getWorkGroupDimensions(MDNode *Node) const; + + void streamVersionMetadata(); + + void streamIsaInfoMetadata(const FeatureBitset &Features); + + void streamPrintfInfoMetadata(const Module &Mod); + + void streamHighLevelKernelMetadata(const Function &Func); + + void streamKernelArgMetadata(const Argument &Arg); + + void streamKernelArgMetadata(const DataLayout &DL, Type *Ty, + KernelArg::Kind Kind, + StringRef BaseTypeName = "", + StringRef TypeName = "", StringRef ArgName = "", + StringRef AccQual = "", StringRef TypeQual = ""); + +public: + Streamer() = default; + ~Streamer() = default; + + void streamBegin(const FeatureBitset &Features, const Module &Mod); + + void streamEnd() {} + + void streamKernelMetadata(const Function &Func); + + std::string toYamlString(); +}; + +} // end namespace RuntimeMetadata +} // end namespace AMDGPU +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMETADATASTREAMER_H Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMetadataStreamer.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMetadataStreamer.cpp @@ -0,0 +1,468 @@ +//===--- AMDGPURuntimeMetadataStreamer.cpp - Streams Runtime Metadata -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPURuntimeMetadataStreamer.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/YAMLTraits.h" +#include + +using namespace llvm::AMDGPU::RuntimeMetadata; + +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint8_t) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) +LLVM_YAML_IS_SEQUENCE_VECTOR(KernelArg::Metadata) +LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata) + +namespace llvm { +namespace { + +cl::opt DumpRuntimeMetadata( + "amdgpu-rtmd-dump", + cl::desc("Dump AMDGPU Runtime Metadata")); +cl::opt VerifyRuntimeMetadata( + "amdgpu-rtmd-verify", cl::Hidden, + cl::desc("Verify AMDGPU Runtime Metadata")); + +} // end namespace anonymous + +namespace yaml { + +template <> struct MappingTraits { + static void mapping(IO &YamlIO, KernelArg::Metadata &A) { + YamlIO.mapRequired(KeyName::ArgSize, A.Size); + YamlIO.mapRequired(KeyName::ArgAlign, A.Align); + YamlIO.mapOptional(KeyName::ArgPointeeAlign, A.PointeeAlign, 0U); + YamlIO.mapRequired(KeyName::ArgKind, A.Kind); + YamlIO.mapRequired(KeyName::ArgValueType, A.ValueType); + YamlIO.mapOptional(KeyName::ArgTypeName, A.TypeName, std::string()); + YamlIO.mapOptional(KeyName::ArgName, A.Name, std::string()); + YamlIO.mapOptional(KeyName::ArgAddrQual, A.AddrQual, INVALID_ADDR_QUAL); + YamlIO.mapOptional(KeyName::ArgAccQual, A.AccQual, INVALID_ACC_QUAL); + YamlIO.mapOptional(KeyName::ArgIsVolatile, A.IsVolatile, uint8_t(0)); + YamlIO.mapOptional(KeyName::ArgIsConst, A.IsConst, uint8_t(0)); + YamlIO.mapOptional(KeyName::ArgIsRestrict, A.IsRestrict, uint8_t(0)); + YamlIO.mapOptional(KeyName::ArgIsPipe, A.IsPipe, uint8_t(0)); + } + static const bool flow = true; +}; + +template <> struct MappingTraits { + static void mapping(IO &YamlIO, Kernel::Metadata &K) { + YamlIO.mapRequired(KeyName::KernelName, K.Name); + YamlIO.mapOptional(KeyName::Language, K.Language, std::string()); + YamlIO.mapOptional(KeyName::LanguageVersion, K.LanguageVersion); + YamlIO.mapOptional(KeyName::ReqdWorkGroupSize, K.ReqdWorkGroupSize); + YamlIO.mapOptional(KeyName::WorkGroupSizeHint, K.WorkGroupSizeHint); + YamlIO.mapOptional(KeyName::VecTypeHint, K.VecTypeHint, std::string()); + YamlIO.mapOptional( + KeyName::KernelIndex, K.KernelIndex, INVALID_KERNEL_INDEX); + YamlIO.mapOptional( + KeyName::NoPartialWorkGroups, K.NoPartialWorkGroups, uint8_t(0)); + YamlIO.mapRequired(KeyName::Args, K.Args); + } + static const bool flow = true; +}; + +template <> struct MappingTraits { + static void mapping(IO &YamlIO, IsaInfo::Metadata &I) { + YamlIO.mapRequired(KeyName::IsaInfoWavefrontSize, I.WavefrontSize); + YamlIO.mapRequired(KeyName::IsaInfoLocalMemorySize, I.LocalMemorySize); + YamlIO.mapRequired(KeyName::IsaInfoEUsPerCU, I.EUsPerCU); + YamlIO.mapRequired(KeyName::IsaInfoMaxWavesPerEU, I.MaxWavesPerEU); + YamlIO.mapRequired( + KeyName::IsaInfoMaxFlatWorkGroupSize, I.MaxFlatWorkGroupSize); + YamlIO.mapRequired(KeyName::IsaInfoSGPRAllocGranule, I.SGPRAllocGranule); + YamlIO.mapRequired(KeyName::IsaInfoTotalNumSGPRs, I.TotalNumSGPRs); + YamlIO.mapRequired( + KeyName::IsaInfoAddressableNumSGPRs, I.AddressableNumSGPRs); + YamlIO.mapRequired(KeyName::IsaInfoVGPRAllocGranule, I.VGPRAllocGranule); + YamlIO.mapRequired(KeyName::IsaInfoTotalNumVGPRs, I.TotalNumVGPRs); + YamlIO.mapRequired( + KeyName::IsaInfoAddressableNumVGPRs, I.AddressableNumVGPRs); + } + static const bool flow = true; +}; + +template <> struct MappingTraits { + static void mapping(IO &YamlIO, Program::Metadata &Prog) { + YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq); + YamlIO.mapRequired(KeyName::IsaInfo, Prog.IsaInfo); + YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo); + YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels); + } + static const bool flow = true; +}; + +} // end namespace yaml + + +namespace AMDGPU { +namespace RuntimeMetadata { + +Program::Metadata::Metadata(const std::string &YamlString) { + yaml::Input Input(YamlString); + Input >> *this; +} + +std::string Program::Metadata::toYamlString() { + std::string Text; + raw_string_ostream Stream(Text); + yaml::Output Output(Stream, nullptr, std::numeric_limits::max()); + Output << *this; + return Stream.str(); +} + +Program::Metadata Program::Metadata::fromYamlString( + const std::string &YamlString) { + return Program::Metadata(YamlString); +} + +void Streamer::dump(const std::string &YamlString) const { + errs() << "AMDGPU Runtime Metadata:\n" << YamlString << '\n'; +} + +void Streamer::verify(const std::string &YamlString) const { + auto FromYamlString = Program::Metadata::fromYamlString(YamlString); + auto ToYamlString = FromYamlString.toYamlString(); + + errs() << "AMDGPU Runtime Metadata Parser Test: " + << (YamlString == ToYamlString ? "PASS" : "FAIL") << '\n'; + if (YamlString != ToYamlString) + errs() << "Original input: " << YamlString << '\n' + << "Produced output: " << ToYamlString << '\n'; +} + +Streamer::AddressSpaceQualifer Streamer::getAddressSpaceQualifer( + unsigned AddressSpace) const { + switch (AddressSpace) { + case AMDGPUAS::GLOBAL_ADDRESS: + return KernelArg::Global; + case AMDGPUAS::CONSTANT_ADDRESS: + return KernelArg::Constant; + case AMDGPUAS::LOCAL_ADDRESS: + return KernelArg::Local; + case AMDGPUAS::FLAT_ADDRESS: + return KernelArg::Generic; + case AMDGPUAS::REGION_ADDRESS: + return KernelArg::Region; + default: + return KernelArg::Private; + } +} + +std::string Streamer::getTypeName(Type *Ty, bool Signed) const { + switch (Ty->getTypeID()) { + case Type::IntegerTyID: { + if (!Signed) + return (Twine('u') + getTypeName(Ty, true)).str(); + + unsigned BitWidth = Ty->getIntegerBitWidth(); + switch (BitWidth) { + case 8: + return "char"; + case 16: + return "short"; + case 32: + return "int"; + case 64: + return "long"; + default: + return (Twine('i') + Twine(BitWidth)).str(); + } + } + case Type::HalfTyID: + return "half"; + case Type::FloatTyID: + return "float"; + case Type::DoubleTyID: + return "double"; + case Type::VectorTyID: { + VectorType *VecTy = cast(Ty); + Type *ElTy = VecTy->getElementType(); + unsigned NumElements = VecTy->getVectorNumElements(); + return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); + } + default: + return "unknown"; + } +} + +Streamer::ValueType Streamer::getValueType(Type *Ty, StringRef TypeName) const { + switch (Ty->getTypeID()) { + case Type::IntegerTyID: { + bool Signed = !TypeName.startswith("u"); + switch (Ty->getIntegerBitWidth()) { + case 8: + return Signed ? KernelArg::I8 : KernelArg::U8; + case 16: + return Signed ? KernelArg::I16 : KernelArg::U16; + case 32: + return Signed ? KernelArg::I32 : KernelArg::U32; + case 64: + return Signed ? KernelArg::I64 : KernelArg::U64; + default: + return KernelArg::Struct; + } + } + case Type::HalfTyID: + return KernelArg::F16; + case Type::FloatTyID: + return KernelArg::F32; + case Type::DoubleTyID: + return KernelArg::F64; + case Type::VectorTyID: + return getValueType(Ty->getVectorElementType(), TypeName); + case Type::PointerTyID: + return getValueType(Ty->getPointerElementType(), TypeName); + default: + return KernelArg::Struct; + } +} + +Streamer::WorkGroupDimensions Streamer::getWorkGroupDimensions( + MDNode *Node) const { + Streamer::WorkGroupDimensions Dim; + if (Node->getNumOperands() != 3) + return Dim; + + for (auto &Op : Node->operands()) + Dim.push_back(mdconst::extract(Op)->getZExtValue()); + return Dim; +} + +void Streamer::streamVersionMetadata() { + Program.MDVersionSeq.push_back(MDVersion); + Program.MDVersionSeq.push_back(MDRevision); +} + +void Streamer::streamIsaInfoMetadata(const FeatureBitset &Features) { + auto &II = Program.IsaInfo; + II.WavefrontSize = AMDGPU::IsaInfo::getWavefrontSize(Features); + II.LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(Features); + II.EUsPerCU = AMDGPU::IsaInfo::getEUsPerCU(Features); + II.MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(Features); + II.MaxFlatWorkGroupSize = AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(Features); + II.SGPRAllocGranule = AMDGPU::IsaInfo::getSGPRAllocGranule(Features); + II.TotalNumSGPRs = AMDGPU::IsaInfo::getTotalNumSGPRs(Features); + II.AddressableNumSGPRs = AMDGPU::IsaInfo::getAddressableNumSGPRs(Features); + II.VGPRAllocGranule = AMDGPU::IsaInfo::getVGPRAllocGranule(Features); + II.TotalNumVGPRs = AMDGPU::IsaInfo::getTotalNumVGPRs(Features); + II.AddressableNumVGPRs = AMDGPU::IsaInfo::getAddressableNumVGPRs(Features); +} + +void Streamer::streamPrintfInfoMetadata(const Module &Mod) { + auto &PI = Program.PrintfInfo; + if (auto Node = Mod.getNamedMetadata("llvm.printf.fmts")) + for (auto Op : Node->operands()) + if (Op->getNumOperands()) + PI.push_back(cast(Op->getOperand(0))->getString()); +} + +void Streamer::streamHighLevelKernelMetadata(const Function &Func) { + auto &K = Program.Kernels.back(); + + K.Name = Func.getName(); + + if (auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version")) { + if (Node->getNumOperands()) { + auto Op0 = Node->getOperand(0); + if (Op0->getNumOperands() > 1) { + K.Language = "OpenCL C"; + K.LanguageVersion.push_back( /* Major */ + mdconst::extract(Op0->getOperand(0))->getZExtValue()); + K.LanguageVersion.push_back( /* Minor */ + mdconst::extract(Op0->getOperand(1))->getZExtValue()); + } + } + } else { + // TODO: What about other languages? + } + + if (auto Node = Func.getMetadata("reqd_work_group_size")) + K.ReqdWorkGroupSize = getWorkGroupDimensions(Node); + if (auto Node = Func.getMetadata("vec_type_hint")) + K.VecTypeHint = getTypeName( + cast(Node->getOperand(0))->getType(), + mdconst::extract(Node->getOperand(1))->getZExtValue()); + if (auto Node = Func.getMetadata("work_group_size_hint")) + K.WorkGroupSizeHint = getWorkGroupDimensions(Node); + + for (auto &Arg : Func.args()) { + streamKernelArgMetadata(Arg); + } + + auto &DL = Func.getParent()->getDataLayout(); + if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) { + auto Int64Ty = Type::getInt64Ty(Func.getContext()); + + streamKernelArgMetadata(DL, Int64Ty, KernelArg::HiddenGlobalOffsetX); + streamKernelArgMetadata(DL, Int64Ty, KernelArg::HiddenGlobalOffsetY); + streamKernelArgMetadata(DL, Int64Ty, KernelArg::HiddenGlobalOffsetZ); + + if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) { + auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), KernelArg::Global); + streamKernelArgMetadata(DL, Int8PtrTy, KernelArg::HiddenPrintfBuffer); + } + } else { + // TODO: What about other languages? + } +} + +void Streamer::streamKernelArgMetadata(const Argument &Arg) { + auto &Func = *Arg.getParent(); + unsigned ArgNo = Arg.getArgNo(); + const MDNode *Node; + + StringRef BaseTypeName; + Node = Func.getMetadata("kernel_arg_base_type"); + if (Node && ArgNo < Node->getNumOperands()) + BaseTypeName = cast(Node->getOperand(ArgNo))->getString(); + + StringRef TypeName; + Node = Func.getMetadata("kernel_arg_type"); + if (Node && ArgNo < Node->getNumOperands()) + TypeName = cast(Node->getOperand(ArgNo))->getString(); + + StringRef AccQual; + Node = Func.getMetadata("kernel_arg_access_qual"); + if (Node && ArgNo < Node->getNumOperands()) + AccQual = cast(Node->getOperand(ArgNo))->getString(); + + StringRef TypeQual; + Node = Func.getMetadata("kernel_arg_type_qual"); + if (Node && ArgNo < Node->getNumOperands()) + TypeQual = cast(Node->getOperand(ArgNo))->getString(); + + KernelArg::Kind Kind; + if (TypeQual.find("pipe") != StringRef::npos) + Kind = KernelArg::Pipe; + else + Kind = StringSwitch(BaseTypeName) + .Case("queue_t", KernelArg::Queue) + .Case("sampler_t", KernelArg::Sampler) + .Cases("image1d_t", + "image1d_array_t", + "image1d_buffer_t", + "image2d_t" , + "image2d_array_t", + "image2d_array_depth_t", + "image2d_array_msaa_t" + "image2d_array_msaa_depth_t" + "image2d_depth_t", + "image2d_msaa_t", + "image2d_msaa_depth_t", + "image3d_t", KernelArg::Image) + .Default(isa(Arg.getType()) ? + (Arg.getType()->getPointerAddressSpace() == + AMDGPUAS::LOCAL_ADDRESS ? + KernelArg::DynamicSharedPointer : + KernelArg::GlobalBuffer) : + KernelArg::ByValue); + + StringRef ArgName; + Node = Func.getMetadata("kernel_arg_name"); + if (Node && ArgNo < Node->getNumOperands()) + ArgName = cast(Node->getOperand(ArgNo))->getString(); + + streamKernelArgMetadata( + Func.getParent()->getDataLayout(), Arg.getType(), Kind, BaseTypeName, + TypeName, ArgName, AccQual, TypeQual); +} + +void Streamer::streamKernelArgMetadata(const DataLayout &DL, Type *Ty, + KernelArg::Kind Kind, + StringRef BaseTypeName, + StringRef TypeName, StringRef ArgName, + StringRef AccQual, StringRef TypeQual) { + Program.Kernels.back().Args.push_back(KernelArg::Metadata()); + auto &A = Program.Kernels.back().Args.back(); + + A.Size = DL.getTypeAllocSize(Ty); + A.Align = DL.getABITypeAlignment(Ty); + A.Kind = Kind; + A.ValueType = getValueType(Ty, BaseTypeName); + A.TypeName = TypeName; + A.Name = ArgName; + + if (auto PtrTy = dyn_cast(Ty)) { + auto ElTy = PtrTy->getElementType(); + if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ElTy->isSized()) + A.PointeeAlign = DL.getABITypeAlignment(ElTy); + } + + if (auto PtrTy = dyn_cast(Ty)) + A.AddrQual = getAddressSpaceQualifer(PtrTy->getAddressSpace()); + + if (!AccQual.empty()) + A.AccQual = StringSwitch(AccQual) + .Case("read_only", KernelArg::ReadOnly) + .Case("write_only", KernelArg::WriteOnly) + .Case("read_write", KernelArg::ReadWrite) + .Default(KernelArg::AccNone); + + SmallVector SplitTypeQuals; + TypeQual.split(SplitTypeQuals, " ", -1, /* Drop empty entry */ false); + for (StringRef KeyName : SplitTypeQuals) { + auto *P = StringSwitch(KeyName) + .Case("volatile", &A.IsVolatile) + .Case("const", &A.IsConst) + .Case("restrict", &A.IsRestrict) + .Case("pipe", &A.IsPipe) + .Default(nullptr); + if (P) + *P = 1; + } +} + +void Streamer::streamBegin(const FeatureBitset &Features, const Module &Mod) { + streamVersionMetadata(); + streamIsaInfoMetadata(Features); + streamPrintfInfoMetadata(Mod); +} + +void Streamer::streamKernelMetadata(const Function &Func) { + if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) + return; + + Program.Kernels.push_back(Kernel::Metadata()); + streamHighLevelKernelMetadata(Func); +} + +std::string Streamer::toYamlString() { + auto YamlString = Program.toYamlString(); + + if (DumpRuntimeMetadata) + dump(YamlString); + if (VerifyRuntimeMetadata) + verify(YamlString); + + return YamlString; +} + +} // end namespace RuntimeMetadata +} // end namespace AMDGPU +} // end namespace llvm Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -10,6 +10,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H +#include "AMDGPURuntimeMetadataStreamer.h" #include "AMDKernelCodeT.h" #include "llvm/MC/MCStreamer.h" @@ -27,6 +28,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer { protected: + AMDGPU::RuntimeMetadata::Streamer RuntimeMetadataStreamer; MCContext &getContext() const { return Streamer.getContext(); } public: @@ -47,13 +49,17 @@ virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0; - virtual void EmitRuntimeMetadata(const FeatureBitset &Features, - const Module &M) = 0; + virtual void EmitStartOfRuntimeMetadata(const FeatureBitset &Features, + const Module &Mod); - virtual void EmitRuntimeMetadata(StringRef Metadata) = 0; + virtual void EmitKernelRuntimeMetadata(const Function &Func); + + virtual void EmitEmitEndOfRuntimeMetadata(); + + virtual void EmitRuntimeMetadata(StringRef YamlString) = 0; }; -class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer { +class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer { formatted_raw_ostream &OS; public: AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); @@ -72,13 +78,10 @@ void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - void EmitRuntimeMetadata(const FeatureBitset &Features, - const Module &M) override; - - void EmitRuntimeMetadata(StringRef Metadata) override; + void EmitRuntimeMetadata(StringRef YamlString) override; }; -class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer { +class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { MCStreamer &Streamer; void EmitAMDGPUNote(const MCExpr *DescSize, AMDGPU::PT_NOTE::NoteType Type, @@ -104,10 +107,7 @@ void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - void EmitRuntimeMetadata(const FeatureBitset &Features, - const Module &M) override; - - void EmitRuntimeMetadata(StringRef Metadata) override; + void EmitRuntimeMetadata(StringRef YamlString) override; }; } Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -27,7 +27,6 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" -#include "AMDGPURuntimeMD.h" namespace llvm { #include "AMDGPUPTNote.h" @@ -39,6 +38,21 @@ AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} +void AMDGPUTargetStreamer::EmitStartOfRuntimeMetadata( + const FeatureBitset &Features, + const Module &Mod) { + RuntimeMetadataStreamer.streamBegin(Features, Mod); +} + +void AMDGPUTargetStreamer::EmitKernelRuntimeMetadata(const Function &Func) { + RuntimeMetadataStreamer.streamKernelMetadata(Func); +} + +void AMDGPUTargetStreamer::EmitEmitEndOfRuntimeMetadata() { + RuntimeMetadataStreamer.streamEnd(); + EmitRuntimeMetadata(RuntimeMetadataStreamer.toYamlString()); +} + //===----------------------------------------------------------------------===// // AMDGPUTargetAsmStreamer //===----------------------------------------------------------------------===// @@ -93,16 +107,9 @@ OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n'; } -void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(const FeatureBitset &Features, - const Module &M) { - OS << "\t.amdgpu_runtime_metadata\n"; - OS << getRuntimeMDYAMLString(Features, M); - OS << "\n\t.end_amdgpu_runtime_metadata\n"; -} - -void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(StringRef Metadata) { +void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(StringRef YamlString) { OS << "\t.amdgpu_runtime_metadata"; - OS << Metadata; + OS << YamlString; OS << "\t.end_amdgpu_runtime_metadata\n"; } @@ -216,7 +223,7 @@ Symbol->setBinding(ELF::STB_GLOBAL); } -void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(StringRef Metadata) { +void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(StringRef YamlString) { // Create two labels to mark the beginning and end of the desc field // and a MCExpr to calculate the size of the desc field. auto &Context = getContext(); @@ -231,13 +238,8 @@ PT_NOTE::NT_AMDGPU_HSA_RUNTIME_METADATA, [&](MCELFStreamer &OS) { OS.EmitLabel(DescBegin); - OS.EmitBytes(Metadata); + OS.EmitBytes(YamlString); OS.EmitLabel(DescEnd); } ); } - -void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(const FeatureBitset &Features, - const Module &M) { - EmitRuntimeMetadata(getRuntimeMDYAMLString(Features, M)); -} Index: lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt +++ lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt @@ -6,7 +6,7 @@ AMDGPUMCCodeEmitter.cpp AMDGPUMCTargetDesc.cpp AMDGPUMCAsmInfo.cpp - AMDGPURuntimeMD.cpp + AMDGPURuntimeMetadataStreamer.cpp AMDGPUTargetStreamer.cpp R600MCCodeEmitter.cpp SIMCCodeEmitter.cpp Index: test/CodeGen/AMDGPU/runtime-metadata.ll =================================================================== --- test/CodeGen/AMDGPU/runtime-metadata.ll +++ test/CodeGen/AMDGPU/runtime-metadata.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck %s --check-prefix=NOTES --check-prefix=SI ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck %s --check-prefix=NOTES --check-prefix=VI -; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -amdgpu-dump-rtmd -amdgpu-check-rtmd-parser %s -o - 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -amdgpu-rtmd-dump -amdgpu-rtmd-verify %s -o - 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=PARSER %s %struct.A = type { i8, float } %opencl.image1d_t = type opaque @@ -341,7 +341,7 @@ ; CHECK-NEXT:... -; PARSER: AMDGPU runtime metadata parser test passes. +; PARSER: AMDGPU Runtime Metadata Parser Test: PASS ; NOTES: Displaying notes found at file offset 0x{{[0-9]+}} ; NOTES-NEXT: Owner Data size Description