Index: llvm/trunk/include/llvm/Support/AMDGPUMetadata.h =================================================================== --- llvm/trunk/include/llvm/Support/AMDGPUMetadata.h +++ llvm/trunk/include/llvm/Support/AMDGPUMetadata.h @@ -457,6 +457,19 @@ /// PAL metadata keys. enum Key : uint32_t { + R_2E12_COMPUTE_PGM_RSRC1 = 0x2e12, + R_2D4A_SPI_SHADER_PGM_RSRC1_LS = 0x2d4a, + R_2D0A_SPI_SHADER_PGM_RSRC1_HS = 0x2d0a, + R_2CCA_SPI_SHADER_PGM_RSRC1_ES = 0x2cca, + R_2C8A_SPI_SHADER_PGM_RSRC1_GS = 0x2c8a, + R_2C4A_SPI_SHADER_PGM_RSRC1_VS = 0x2c4a, + R_2C0A_SPI_SHADER_PGM_RSRC1_PS = 0x2c0a, + R_2E00_COMPUTE_DISPATCH_INITIATOR = 0x2e00, + R_A1B3_SPI_PS_INPUT_ENA = 0xa1b3, + R_A1B4_SPI_PS_INPUT_ADDR = 0xa1b4, + R_A1B6_SPI_PS_IN_CONTROL = 0xa1b6, + R_A2D5_VGT_SHADER_STAGES_EN = 0xa2d5, + LS_NUM_USED_VGPRS = 0x10000021, HS_NUM_USED_VGPRS = 0x10000022, ES_NUM_USED_VGPRS = 0x10000023, @@ -482,12 +495,6 @@ CS_SCRATCH_SIZE = 0x1000004a }; -/// PAL metadata represented as a vector. -typedef std::vector Metadata; - -/// Converts \p PALMetadata to \p String. -std::error_code toString(const Metadata &PALMetadata, std::string &String); - } // end namespace PALMD } // end namespace AMDGPU } // end namespace llvm Index: llvm/trunk/lib/Support/AMDGPUMetadata.cpp =================================================================== --- llvm/trunk/lib/Support/AMDGPUMetadata.cpp +++ llvm/trunk/lib/Support/AMDGPUMetadata.cpp @@ -218,19 +218,5 @@ } } // end namespace HSAMD - -namespace PALMD { - -std::error_code toString(const Metadata &PALMetadata, std::string &String) { - raw_string_ostream Stream(String); - for (auto I = PALMetadata.begin(), E = PALMetadata.end(); I != E; ++I) { - Stream << Twine(I == PALMetadata.begin() ? " 0x" : ",0x"); - Stream << Twine::utohexstr(*I); - } - Stream.flush(); - return std::error_code(); -} - -} // end namespace PALMD } // end namespace AMDGPU } // end namespace llvm Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -56,12 +56,10 @@ DenseMap CallGraphResourceInfo; std::unique_ptr HSAMetadataStream; - std::map PALMetadataMap; uint64_t getFunctionCodeSize(const MachineFunction &MF) const; SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF) const; - void readPALMetadata(Module &M); void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF); void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo, const MachineFunction &MF) const; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -136,7 +136,7 @@ HSAMetadataStream->begin(M); if (TM.getTargetTriple().getOS() == Triple::AMDPAL) - readPALMetadata(M); + getTargetStreamer()->getPALMetadata()->readFromIR(M); if (IsaInfo::hasCodeObjectV3(getGlobalSTI())) return; @@ -171,20 +171,6 @@ (void)Success; assert(Success && "Malformed HSA Metadata"); } - - if (!IsaInfo::hasCodeObjectV3(getGlobalSTI())) { - // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA). - if (TM.getTargetTriple().getOS() == Triple::AMDPAL) { - // Copy the PAL metadata from the map where we collected it into a vector, - // then write it as a .note. - PALMD::Metadata PALMetadataVector; - for (auto i : PALMetadataMap) { - PALMetadataVector.push_back(i.first); - PALMetadataVector.push_back(i.second); - } - getTargetStreamer()->EmitPALMetadata(PALMetadataVector); - } - } } bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( @@ -312,27 +298,6 @@ return AsmPrinter::doFinalization(M); } -// For the amdpal OS type, read the amdgpu.pal.metadata supplied by the -// frontend into our PALMetadataMap, ready for per-function modification. It -// is a NamedMD containing an MDTuple containing a number of MDNodes each of -// which is an integer value, and each two integer values forms a key=value -// pair that we store as PALMetadataMap[key]=value in the map. -void AMDGPUAsmPrinter::readPALMetadata(Module &M) { - auto NamedMD = M.getNamedMetadata("amdgpu.pal.metadata"); - if (!NamedMD || !NamedMD->getNumOperands()) - return; - auto Tuple = dyn_cast(NamedMD->getOperand(0)); - if (!Tuple) - return; - for (unsigned I = 0, E = Tuple->getNumOperands() & -2; I != E; I += 2) { - auto Key = mdconst::dyn_extract(Tuple->getOperand(I)); - auto Val = mdconst::dyn_extract(Tuple->getOperand(I + 1)); - if (!Key || !Val) - continue; - PALMetadataMap[Key->getZExtValue()] = Val->getZExtValue(); - } -} - // Print comments that apply to both callable functions and entry points. void AMDGPUAsmPrinter::emitCommonFunctionComments( uint32_t NumVGPR, @@ -1048,70 +1013,32 @@ // This is the equivalent of EmitProgramInfoSI above, but for when the OS type // is AMDPAL. It stores each compute/SPI register setting and other PAL -// metadata items into the PALMetadataMap, combining with any provided by the -// frontend as LLVM metadata. Once all functions are written, PALMetadataMap is -// then written as a single block in the .note section. +// metadata items into the PALMD::Metadata, combining with any provided by the +// frontend as LLVM metadata. Once all functions are written, the PAL metadata +// is then written as a single block in the .note section. void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo) { const SIMachineFunctionInfo *MFI = MF.getInfo(); - // Given the calling convention, calculate the register number for rsrc1. In - // principle the register number could change in future hardware, but we know - // it is the same for gfx6-9 (except that LS and ES don't exist on gfx9), so - // we can use the same fixed value that .AMDGPU.config has for Mesa. Note - // that we use a register number rather than a byte offset, so we need to - // divide by 4. - unsigned Rsrc1Reg = getRsrcReg(MF.getFunction().getCallingConv()) / 4; - unsigned Rsrc2Reg = Rsrc1Reg + 1; - // Also calculate the PAL metadata key for *S_SCRATCH_SIZE. It can be used - // with a constant offset to access any non-register shader-specific PAL - // metadata key. - unsigned ScratchSizeKey = PALMD::Key::CS_SCRATCH_SIZE; - switch (MF.getFunction().getCallingConv()) { - case CallingConv::AMDGPU_PS: - ScratchSizeKey = PALMD::Key::PS_SCRATCH_SIZE; - break; - case CallingConv::AMDGPU_VS: - ScratchSizeKey = PALMD::Key::VS_SCRATCH_SIZE; - break; - case CallingConv::AMDGPU_GS: - ScratchSizeKey = PALMD::Key::GS_SCRATCH_SIZE; - break; - case CallingConv::AMDGPU_ES: - ScratchSizeKey = PALMD::Key::ES_SCRATCH_SIZE; - break; - case CallingConv::AMDGPU_HS: - ScratchSizeKey = PALMD::Key::HS_SCRATCH_SIZE; - break; - case CallingConv::AMDGPU_LS: - ScratchSizeKey = PALMD::Key::LS_SCRATCH_SIZE; - break; - } - unsigned NumUsedVgprsKey = ScratchSizeKey + - PALMD::Key::VS_NUM_USED_VGPRS - PALMD::Key::VS_SCRATCH_SIZE; - unsigned NumUsedSgprsKey = ScratchSizeKey + - PALMD::Key::VS_NUM_USED_SGPRS - PALMD::Key::VS_SCRATCH_SIZE; - PALMetadataMap[NumUsedVgprsKey] = CurrentProgramInfo.NumVGPRsForWavesPerEU; - PALMetadataMap[NumUsedSgprsKey] = CurrentProgramInfo.NumSGPRsForWavesPerEU; + auto CC = MF.getFunction().getCallingConv(); + auto MD = getTargetStreamer()->getPALMetadata(); + + MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU); + MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU); if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { - PALMetadataMap[Rsrc1Reg] |= CurrentProgramInfo.ComputePGMRSrc1; - PALMetadataMap[Rsrc2Reg] |= CurrentProgramInfo.ComputePGMRSrc2; - // ScratchSize is in bytes, 16 aligned. - PALMetadataMap[ScratchSizeKey] |= - alignTo(CurrentProgramInfo.ScratchSize, 16); + MD->setRsrc1(CC, CurrentProgramInfo.ComputePGMRSrc1); + MD->setRsrc2(CC, CurrentProgramInfo.ComputePGMRSrc2); } else { - PALMetadataMap[Rsrc1Reg] |= S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) | - S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks); + MD->setRsrc1(CC, S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) | + S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks)); if (CurrentProgramInfo.ScratchBlocks > 0) - PALMetadataMap[Rsrc2Reg] |= S_00B84C_SCRATCH_EN(1); - // ScratchSize is in bytes, 16 aligned. - PALMetadataMap[ScratchSizeKey] |= - alignTo(CurrentProgramInfo.ScratchSize, 16); + MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1)); } + // ScratchSize is in bytes, 16 aligned. + MD->setScratchSize(CC, alignTo(CurrentProgramInfo.ScratchSize, 16)); if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) { - PALMetadataMap[Rsrc2Reg] |= - S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks); - PALMetadataMap[R_0286CC_SPI_PS_INPUT_ENA / 4] |= MFI->getPSInputEnable(); - PALMetadataMap[R_0286D0_SPI_PS_INPUT_ADDR / 4] |= MFI->getPSInputAddr(); + MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks)); + MD->setSpiPsInputEna(MFI->getPSInputEnable()); + MD->setSpiPsInputAddr(MFI->getPSInputAddr()); } } Index: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -3349,19 +3349,27 @@ "not available on non-amdpal OSes")).str()); } - PALMD::Metadata PALMetadata; + auto PALMetadata = getTargetStreamer().getPALMetadata(); for (;;) { - uint32_t Value; + uint32_t Key, Value; + if (ParseAsAbsoluteExpression(Key)) { + return TokError(Twine("invalid value in ") + + Twine(PALMD::AssemblerDirective)); + } + if (getLexer().isNot(AsmToken::Comma)) { + return TokError(Twine("expected an even number of values in ") + + Twine(PALMD::AssemblerDirective)); + } + Lex(); if (ParseAsAbsoluteExpression(Value)) { return TokError(Twine("invalid value in ") + Twine(PALMD::AssemblerDirective)); } - PALMetadata.push_back(Value); + PALMetadata->setRegister(Key, Value); if (getLexer().isNot(AsmToken::Comma)) break; Lex(); } - getTargetStreamer().EmitPALMetadata(PALMetadata); return false; } Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -10,6 +10,7 @@ #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H #include "AMDKernelCodeT.h" +#include "Utils/AMDGPUPALMetadata.h" #include "llvm/BinaryFormat/MsgPackDocument.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -28,12 +29,16 @@ class Type; class AMDGPUTargetStreamer : public MCTargetStreamer { + AMDGPUPALMetadata PALMetadata; + protected: MCContext &getContext() const { return Streamer.getContext(); } public: AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} + AMDGPUPALMetadata *getPALMetadata() { return &PALMetadata; } + virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0; virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, @@ -69,9 +74,6 @@ /// \returns True on success, false on failure. virtual bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) = 0; - /// \returns True on success, false on failure. - virtual bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) = 0; - virtual void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, @@ -87,6 +89,8 @@ public: AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); + void finish() override; + void EmitDirectiveAMDGCNTarget(StringRef Target) override; void EmitDirectiveHSACodeObjectVersion(uint32_t Major, @@ -109,9 +113,6 @@ /// \returns True on success, false on failure. bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override; - /// \returns True on success, false on failure. - bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) override; - void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, @@ -130,6 +131,8 @@ MCELFStreamer &getStreamer(); + void finish() override; + void EmitDirectiveAMDGCNTarget(StringRef Target) override; void EmitDirectiveHSACodeObjectVersion(uint32_t Major, @@ -152,9 +155,6 @@ /// \returns True on success, false on failure. bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override; - /// \returns True on success, false on failure. - bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) override; - void EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -153,6 +153,14 @@ formatted_raw_ostream &OS) : AMDGPUTargetStreamer(S), OS(OS) { } +// A hook for emitting stuff at the end. +// We use it for emitting the accumulated PAL metadata as directives. +void AMDGPUTargetAsmStreamer::finish() { + std::string S; + getPALMetadata()->toString(S); + OS << S; +} + void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) { OS << "\t.amdgcn_target \"" << Target << "\"\n"; } @@ -225,16 +233,6 @@ return true; } -bool AMDGPUTargetAsmStreamer::EmitPALMetadata( - const PALMD::Metadata &PALMetadata) { - std::string PALMetadataString; - if (PALMD::toString(PALMetadata, PALMetadataString)) - return false; - - OS << '\t' << PALMD::AssemblerDirective << PALMetadataString << '\n'; - return true; -} - void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR, @@ -382,6 +380,19 @@ return static_cast(Streamer); } +// A hook for emitting stuff at the end. +// We use it for emitting the accumulated PAL metadata as a .note record. +void AMDGPUTargetELFStreamer::finish() { + std::string Blob; + unsigned Type = ELF::NT_AMD_AMDGPU_PAL_METADATA; + getPALMetadata()->toBlob(Type, Blob); + if (Blob.empty()) + return; + EmitNote(ElfNote::NoteNameV2, + MCConstantExpr::create(Blob.size(), getContext()), Type, + [&](MCELFStreamer &OS) { OS.EmitBytes(Blob); }); +} + void AMDGPUTargetELFStreamer::EmitNote( StringRef Name, const MCExpr *DescSZ, unsigned NoteType, function_ref EmitDesc) { @@ -528,18 +539,6 @@ return true; } -bool AMDGPUTargetELFStreamer::EmitPALMetadata( - const PALMD::Metadata &PALMetadata) { - EmitNote(ElfNote::NoteNameV2, - MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), - getContext()), - ELF::NT_AMD_AMDGPU_PAL_METADATA, [&](MCELFStreamer &OS) { - for (auto I : PALMetadata) - OS.EmitIntValue(I, sizeof(uint32_t)); - }); - return true; -} - void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h @@ -0,0 +1,87 @@ +//===-- AMDGPUPALMetadata.h - PAL metadata handling -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// PAL metadata handling +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H + +#include "llvm/ADT/StringRef.h" +#include + +namespace llvm { + +class AMDGPUTargetStreamer; +class formatted_raw_ostream; +class MCStreamer; +class Module; + +class AMDGPUPALMetadata { + std::map Registers; + +public: + // Read the amdgpu.pal.metadata supplied by the frontend, ready for + // per-function modification. + void readFromIR(Module &M); + + // Set PAL metadata from a binary blob from the applicable .note record. + // Returns false if bad format. Blob must remain valid for the lifetime of + // the Metadata. + bool setFromBlob(unsigned Type, StringRef Blob); + + // Set the rsrc1 register in the metadata for a particular shader stage. + // In fact this ORs the value into any previous setting of the register. + void setRsrc1(unsigned CC, unsigned Val); + + // Set the rsrc2 register in the metadata for a particular shader stage. + // In fact this ORs the value into any previous setting of the register. + void setRsrc2(unsigned CC, unsigned Val); + + // Set the SPI_PS_INPUT_ENA register in the metadata. + // In fact this ORs the value into any previous setting of the register. + void setSpiPsInputEna(unsigned Val); + + // Set the SPI_PS_INPUT_ADDR register in the metadata. + // In fact this ORs the value into any previous setting of the register. + void setSpiPsInputAddr(unsigned Val); + + // Get a register from the metadata, or 0 if not currently set. + unsigned getRegister(unsigned Reg); + + // Set a register in the metadata. + // In fact this ORs the value into any previous setting of the register. + void setRegister(unsigned Reg, unsigned Val); + + // Set the number of used vgprs in the metadata. This is an optional advisory + // record for logging etc; wave dispatch actually uses the rsrc1 register for + // the shader stage to determine the number of vgprs to allocate. + void setNumUsedVgprs(unsigned CC, unsigned Val); + + // Set the number of used sgprs in the metadata. This is an optional advisory + // record for logging etc; wave dispatch actually uses the rsrc1 register for + // the shader stage to determine the number of sgprs to allocate. + void setNumUsedSgprs(unsigned CC, unsigned Val); + + // Set the scratch size in the metadata. + void setScratchSize(unsigned CC, unsigned Val); + + // Emit the accumulated PAL metadata as an asm directive. + // This is called from AMDGPUTargetAsmStreamer::Finish(). + void toString(std::string &S); + + // Emit the accumulated PAL metadata as a binary blob. + // This is called from AMDGPUTargetELFStreamer::Finish(). + void toBlob(unsigned Type, std::string &S); +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp @@ -0,0 +1,196 @@ +//===-- AMDGPUPALMetadata.cpp - Accumulate and print AMDGPU PAL metadata -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// This class has methods called by AMDGPUAsmPrinter to accumulate and print +/// the PAL metadata. +// +//===----------------------------------------------------------------------===// +// + +#include "AMDGPUPALMetadata.h" +#include "AMDGPU.h" +#include "AMDGPUAsmPrinter.h" +#include "MCTargetDesc/AMDGPUTargetStreamer.h" +#include "SIDefines.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/Support/AMDGPUMetadata.h" +#include "llvm/Support/EndianStream.h" + +using namespace llvm; +using namespace llvm::AMDGPU; + +// Read the amdgpu.pal.metadata supplied by the +// frontend into our Registers, ready for per-function modification. It +// is a NamedMD containing an MDTuple containing a number of MDNodes each of +// which is an integer value, and each two integer values forms a key=value +// pair that we store as Registers[key]=value in the map. +void AMDGPUPALMetadata::readFromIR(Module &M) { + auto NamedMD = M.getNamedMetadata("amdgpu.pal.metadata"); + if (!NamedMD || !NamedMD->getNumOperands()) + return; + auto Tuple = dyn_cast(NamedMD->getOperand(0)); + if (!Tuple) + return; + for (unsigned I = 0, E = Tuple->getNumOperands() & -2; I != E; I += 2) { + auto Key = mdconst::dyn_extract(Tuple->getOperand(I)); + auto Val = mdconst::dyn_extract(Tuple->getOperand(I + 1)); + if (!Key || !Val) + continue; + Registers[Key->getZExtValue()] = Val->getZExtValue(); + } +} + +// Set PAL metadata from a binary blob from the applicable .note record. +// Returns false if bad format. Blob must remain valid for the lifetime of the +// Metadata. +bool AMDGPUPALMetadata::setFromBlob(unsigned Type, StringRef Blob) { + assert(Type == ELF::NT_AMD_AMDGPU_PAL_METADATA); + auto Data = reinterpret_cast(Blob.data()); + for (unsigned I = 0; I != Blob.size() / sizeof(uint32_t) / 2; ++I) + setRegister(Data[I * 2], Data[I * 2 + 1]); + return true; +} + +// Given the calling convention, calculate the register number for rsrc1. In +// principle the register number could change in future hardware, but we know +// it is the same for gfx6-9 (except that LS and ES don't exist on gfx9), so +// we can use fixed values. +static unsigned getRsrc1Reg(CallingConv::ID CC) { + switch (CC) { + default: + return PALMD::R_2E12_COMPUTE_PGM_RSRC1; + case CallingConv::AMDGPU_LS: + return PALMD::R_2D4A_SPI_SHADER_PGM_RSRC1_LS; + case CallingConv::AMDGPU_HS: + return PALMD::R_2D0A_SPI_SHADER_PGM_RSRC1_HS; + case CallingConv::AMDGPU_ES: + return PALMD::R_2CCA_SPI_SHADER_PGM_RSRC1_ES; + case CallingConv::AMDGPU_GS: + return PALMD::R_2C8A_SPI_SHADER_PGM_RSRC1_GS; + case CallingConv::AMDGPU_VS: + return PALMD::R_2C4A_SPI_SHADER_PGM_RSRC1_VS; + case CallingConv::AMDGPU_PS: + return PALMD::R_2C0A_SPI_SHADER_PGM_RSRC1_PS; + } +} + +// Calculate the PAL metadata key for *S_SCRATCH_SIZE. It can be used +// with a constant offset to access any non-register shader-specific PAL +// metadata key. +static unsigned getScratchSizeKey(CallingConv::ID CC) { + switch (CC) { + case CallingConv::AMDGPU_PS: + return PALMD::Key::PS_SCRATCH_SIZE; + case CallingConv::AMDGPU_VS: + return PALMD::Key::VS_SCRATCH_SIZE; + case CallingConv::AMDGPU_GS: + return PALMD::Key::GS_SCRATCH_SIZE; + case CallingConv::AMDGPU_ES: + return PALMD::Key::ES_SCRATCH_SIZE; + case CallingConv::AMDGPU_HS: + return PALMD::Key::HS_SCRATCH_SIZE; + case CallingConv::AMDGPU_LS: + return PALMD::Key::LS_SCRATCH_SIZE; + default: + return PALMD::Key::CS_SCRATCH_SIZE; + } +} + +// Set the rsrc1 register in the metadata for a particular shader stage. +// In fact this ORs the value into any previous setting of the register. +void AMDGPUPALMetadata::setRsrc1(CallingConv::ID CC, unsigned Val) { + setRegister(getRsrc1Reg(CC), Val); +} + +// Set the rsrc2 register in the metadata for a particular shader stage. +// In fact this ORs the value into any previous setting of the register. +void AMDGPUPALMetadata::setRsrc2(CallingConv::ID CC, unsigned Val) { + setRegister(getRsrc1Reg(CC) + 1, Val); +} + +// Set the SPI_PS_INPUT_ENA register in the metadata. +// In fact this ORs the value into any previous setting of the register. +void AMDGPUPALMetadata::setSpiPsInputEna(unsigned Val) { + setRegister(PALMD::R_A1B3_SPI_PS_INPUT_ENA, Val); +} + +// Set the SPI_PS_INPUT_ADDR register in the metadata. +// In fact this ORs the value into any previous setting of the register. +void AMDGPUPALMetadata::setSpiPsInputAddr(unsigned Val) { + setRegister(PALMD::R_A1B4_SPI_PS_INPUT_ADDR, Val); +} + +// Get a register from the metadata, or 0 if not currently set. +unsigned AMDGPUPALMetadata::getRegister(unsigned Reg) { return Registers[Reg]; } + +// Set a register in the metadata. +// In fact this ORs the value into any previous setting of the register. +void AMDGPUPALMetadata::setRegister(unsigned Reg, unsigned Val) { + Registers[Reg] |= Val; +} + +// Set the number of used vgprs in the metadata. This is an optional advisory +// record for logging etc; wave dispatch actually uses the rsrc1 register for +// the shader stage to determine the number of vgprs to allocate. +void AMDGPUPALMetadata::setNumUsedVgprs(CallingConv::ID CC, unsigned Val) { + unsigned NumUsedVgprsKey = getScratchSizeKey(CC) + + PALMD::Key::VS_NUM_USED_VGPRS - + PALMD::Key::VS_SCRATCH_SIZE; + Registers[NumUsedVgprsKey] = Val; +} + +// Set the number of used sgprs in the metadata. This is an optional advisory +// record for logging etc; wave dispatch actually uses the rsrc1 register for +// the shader stage to determine the number of sgprs to allocate. +void AMDGPUPALMetadata::setNumUsedSgprs(CallingConv::ID CC, unsigned Val) { + unsigned NumUsedSgprsKey = getScratchSizeKey(CC) + + PALMD::Key::VS_NUM_USED_SGPRS - + PALMD::Key::VS_SCRATCH_SIZE; + Registers[NumUsedSgprsKey] = Val; +} + +// Set the scratch size in the metadata. +void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) { + Registers[getScratchSizeKey(CC)] = Val; +} + +// Convert the accumulated PAL metadata into an asm directive. +void AMDGPUPALMetadata::toString(std::string &String) { + String.clear(); + if (Registers.empty()) + return; + raw_string_ostream Stream(String); + Stream << '\t' << AMDGPU::PALMD::AssemblerDirective << ' '; + for (auto I = Registers.begin(), E = Registers.end(); I != E; ++I) { + if (I != Registers.begin()) + Stream << ','; + Stream << "0x" << Twine::utohexstr(I->first) << ",0x" + << Twine::utohexstr(I->second); + } + Stream << '\n'; +} + +// Convert the accumulated PAL metadata into a binary blob for writing as +// a .note record of the specified AMD type. +void AMDGPUPALMetadata::toBlob(unsigned Type, std::string &Blob) { + Blob.clear(); + if (Type != ELF::NT_AMD_AMDGPU_PAL_METADATA) + return; + if (Registers.empty()) + return; + raw_string_ostream OS(Blob); + support::endian::Writer EW(OS, support::endianness::little); + for (auto I : Registers) { + EW.write(uint32_t(I.first)); + EW.write(uint32_t(I.second)); + } +} + Index: llvm/trunk/lib/Target/AMDGPU/Utils/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/CMakeLists.txt +++ llvm/trunk/lib/Target/AMDGPU/Utils/CMakeLists.txt @@ -2,4 +2,5 @@ AMDGPUBaseInfo.cpp AMDKernelCodeTUtils.cpp AMDGPUAsmUtils.cpp + AMDGPUPALMetadata.cpp ) Index: llvm/trunk/test/CodeGen/AMDGPU/elf-notes.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/elf-notes.ll +++ llvm/trunk/test/CodeGen/AMDGPU/elf-notes.ll @@ -65,7 +65,8 @@ ; OSABI-PAL-ELF: amdgcn-amd-amdpal--gfx802 ; OSABI-PAL-ELF-NOT: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata) ; OSABI-PAL-ELF: NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata) -; OSABI-PAL-ELF: PAL Metadata: +; TODO: readobj can no longer dump PAL metadata pending resolution of D52821 +; OSABI-PAL-ELF-XXX: PAL Metadata: ; TODO: Following check line fails on mips: ; OSABI-PAL-ELF-XXX: 0x2e12,0xac02c0,0x2e13,0x80,0x1000001b,0x1,0x10000022,0x60,0x1000003e,0x0 Index: llvm/trunk/tools/llvm-readobj/ELFDumper.cpp =================================================================== --- llvm/trunk/tools/llvm-readobj/ELFDumper.cpp +++ llvm/trunk/tools/llvm-readobj/ELFDumper.cpp @@ -3893,17 +3893,6 @@ return {"ISA Version", std::string(reinterpret_cast(Desc.data()), Desc.size())}; - case ELF::NT_AMD_AMDGPU_PAL_METADATA: - const uint32_t *PALMetadataBegin = - reinterpret_cast(Desc.data()); - const uint32_t *PALMetadataEnd = PALMetadataBegin + Desc.size(); - std::vector PALMetadata(PALMetadataBegin, PALMetadataEnd); - std::string PALMetadataString; - auto Error = AMDGPU::PALMD::toString(PALMetadata, PALMetadataString); - if (Error) { - return {"PAL Metadata", "Invalid"}; - } - return {"PAL Metadata", PALMetadataString}; } }