Index: include/llvm/BinaryFormat/ELF.h =================================================================== --- include/llvm/BinaryFormat/ELF.h +++ include/llvm/BinaryFormat/ELF.h @@ -1365,7 +1365,8 @@ // Note types with values between 0 and 9 (inclusive) are reserved. NT_AMD_AMDGPU_HSA_METADATA = 10, NT_AMD_AMDGPU_ISA = 11, - NT_AMD_AMDGPU_PAL_METADATA = 12 + NT_AMD_AMDGPU_PAL_METADATA = 12, + NT_AMD_AMDGPU_PAL_METADATA_MSGPACK = 13 }; // AMDGPU specific notes. (Code Object V3) Index: include/llvm/Support/AMDGPUMetadata.h =================================================================== --- include/llvm/Support/AMDGPUMetadata.h +++ include/llvm/Support/AMDGPUMetadata.h @@ -452,9 +452,15 @@ //===----------------------------------------------------------------------===// namespace PALMD { -/// PAL metadata assembler directive. +/// PAL metadata (old linear format) assembler directive. constexpr char AssemblerDirective[] = ".amd_amdgpu_pal_metadata"; +/// PAL metadata (new MsgPack format) beginning assembler directive. +constexpr char AssemblerDirectiveBegin[] = ".amdgpu_pal_metadata"; + +/// PAL metadata (new MsgPack format) ending assembler directive. +constexpr char AssemblerDirectiveEnd[] = ".end_amdgpu_pal_metadata"; + /// PAL metadata keys. enum Key : uint32_t { R_2E12_COMPUTE_PGM_RSRC1 = 0x2e12, Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1026,6 +1026,7 @@ auto CC = MF.getFunction().getCallingConv(); auto MD = getTargetStreamer()->getPALMetadata(); + MD->setEntryPoint(CC, MF.getFunction().getName()); MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU); MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU); if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -875,8 +875,15 @@ bool ParseDirectiveISAVersion(); bool ParseDirectiveHSAMetadata(); + bool ParseDirectivePALMetadataBegin(); bool ParseDirectivePALMetadata(); + /// Common code to parse out a block of text (typically YAML) between start and + /// end directives. + bool ParseToEndDirective(const char *AssemblerDirectiveBegin, + const char *AssemblerDirectiveEnd, + std::string &CollectString); + bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum); @@ -3119,14 +3126,35 @@ } std::string HSAMetadataString; - raw_string_ostream YamlStream(HSAMetadataString); + if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, + HSAMetadataString)) + return true; + + if (IsaInfo::hasCodeObjectV3(&getSTI())) { + if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) + return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); + } else { + if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) + return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); + } + + return false; +} + +/// Common code to parse out a block of text (typically YAML) between start and +/// end directives. +bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, + const char *AssemblerDirectiveEnd, + std::string &CollectString) { + + raw_string_ostream CollectStream(CollectString); getLexer().setSkipSpace(false); bool FoundEnd = false; while (!getLexer().is(AsmToken::Eof)) { while (getLexer().is(AsmToken::Space)) { - YamlStream << getLexer().getTok().getString(); + CollectStream << getLexer().getTok().getString(); Lex(); } @@ -3139,8 +3167,8 @@ } } - YamlStream << Parser.parseStringToEndOfStatement() - << getContext().getAsmInfo()->getSeparatorString(); + CollectStream << Parser.parseStringToEndOfStatement() + << getContext().getAsmInfo()->getSeparatorString(); Parser.eatToEndOfStatement(); } @@ -3149,22 +3177,27 @@ if (getLexer().is(AsmToken::Eof) && !FoundEnd) { return TokError(Twine("expected directive ") + - Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found")); + Twine(AssemblerDirectiveEnd) + Twine(" not found")); } - YamlStream.flush(); + CollectStream.flush(); + return false; +} - if (IsaInfo::hasCodeObjectV3(&getSTI())) { - if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) - return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); - } else { - if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) - return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); - } +/// Parse the assembler directive for new MsgPack-format PAL metadata. +bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { + std::string String; + if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, + AMDGPU::PALMD::AssemblerDirectiveEnd, String)) + return true; + auto PALMetadata = getTargetStreamer().getPALMetadata(); + if (!PALMetadata->setFromString(String)) + return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); return false; } +/// Parse the assembler directive for old linear-format PAL metadata. bool AMDGPUAsmParser::ParseDirectivePALMetadata() { if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { return Error(getParser().getTok().getLoc(), @@ -3229,6 +3262,9 @@ return ParseDirectiveHSAMetadata(); } + if (IDVal == PALMD::AssemblerDirectiveBegin) + return ParseDirectivePALMetadataBegin(); + if (IDVal == PALMD::AssemblerDirective) return ParseDirectivePALMetadata(); Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -384,7 +384,9 @@ // We use it for emitting the accumulated PAL metadata as a .note record. void AMDGPUTargetELFStreamer::finish() { std::string Blob; - unsigned Type = ELF::NT_AMD_AMDGPU_PAL_METADATA; + unsigned Type = getPALMetadata()->getType(); + if (!Type) + Type = ELF::NT_AMD_AMDGPU_PAL_METADATA; getPALMetadata()->toBlob(Type, Blob); if (Blob.empty()) return; Index: lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h +++ lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MsgPackDocument.h" #include namespace llvm { @@ -25,7 +26,10 @@ class Module; class AMDGPUPALMetadata { - std::map Registers; + unsigned BlobType = 0; + msgpack::Document MsgPackDoc; + msgpack::DocNode Registers; + msgpack::DocNode HwStages; public: // Read the amdgpu.pal.metadata supplied by the frontend, ready for @@ -60,6 +64,9 @@ // In fact this ORs the value into any previous setting of the register. void setRegister(unsigned Reg, unsigned Val); + // Set the entry point name for one shader. + void setEntryPoint(unsigned CC, StringRef Name); + // Set the number of used vgprs in the metadata. This is an optional advisory // record for logging etc; wave dispatch actually uses the rsrc1 register for // the shader stage to determine the number of vgprs to allocate. @@ -73,13 +80,40 @@ // Set the scratch size in the metadata. void setScratchSize(unsigned CC, unsigned Val); - // Emit the accumulated PAL metadata as an asm directive. + // Emit the accumulated PAL metadata as asm directives. // This is called from AMDGPUTargetAsmStreamer::Finish(). void toString(std::string &S); + // Set PAL metadata from YAML text. + bool setFromString(StringRef S); + + // Get .note record type of metadata blob to be emitted: + // 0 = not set, or ELF::NT_AMD_AMDGPU_PAL_METADATA or + // ELF::NT_AMD_AMDGPU_PAL_METADATA_MSGPACK. + unsigned getType() const { return BlobType; } + // Emit the accumulated PAL metadata as a binary blob. // This is called from AMDGPUTargetELFStreamer::Finish(). void toBlob(unsigned Type, std::string &S); + + // Get the msgpack::Document for the PAL metadata. + msgpack::Document *getMsgPackDoc() { return &MsgPackDoc; } + +private: + // Reference (create if necessary) the node for the registers map. + msgpack::DocNode &refRegisters(); + + // Get (create if necessary) the registers map. + msgpack::MapDocNode getRegisters(); + + // Get (create if necessary) the .hardware_stages entry for the given calling + // convention. + msgpack::MapDocNode getHwStage(unsigned CC); + + bool setFromLegacyBlob(StringRef Blob); + bool setFromMsgPackBlob(StringRef Blob); + void toLegacyBlob(std::string &Blob); + void toMsgPackBlob(std::string &Blob); }; } // end namespace llvm Index: lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp @@ -26,15 +26,27 @@ using namespace llvm; using namespace llvm::AMDGPU; -// Read the amdgpu.pal.metadata supplied by the -// frontend into our Registers, ready for per-function modification. It -// is a NamedMD containing an MDTuple containing a number of MDNodes each of -// which is an integer value, and each two integer values forms a key=value -// pair that we store as Registers[key]=value in the map. +// Read the PAL metadata from IR metadata, where it was put by the frontend. void AMDGPUPALMetadata::readFromIR(Module &M) { - auto NamedMD = M.getNamedMetadata("amdgpu.pal.metadata"); + auto NamedMD = M.getNamedMetadata("amdgpu.pal.metadata.msgpack"); + if (NamedMD && NamedMD->getNumOperands()) { + // This is the new msgpack format for metadata. It is a NamedMD containing + // an MDTuple containing an MDString containing the msgpack data. + BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA_MSGPACK; + auto MDN = dyn_cast(NamedMD->getOperand(0)); + if (MDN && MDN->getNumOperands()) + if (auto MDS = dyn_cast(MDN->getOperand(0))) + setFromMsgPackBlob(MDS->getString()); + return; + } + BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA; + NamedMD = M.getNamedMetadata("amdgpu.pal.metadata"); if (!NamedMD || !NamedMD->getNumOperands()) return; + // This is the old reg=value pair format for metadata. It is a NamedMD + // containing an MDTuple containing a number of MDNodes each of which is an + // integer value, and each two integer values forms a key=value pair that we + // store as Registers[key]=value in the map. auto Tuple = dyn_cast(NamedMD->getOperand(0)); if (!Tuple) return; @@ -43,7 +55,7 @@ auto Val = mdconst::dyn_extract(Tuple->getOperand(I + 1)); if (!Key || !Val) continue; - Registers[Key->getZExtValue()] = Val->getZExtValue(); + setRegister(Key->getZExtValue(), Val->getZExtValue()); } } @@ -51,13 +63,31 @@ // Returns false if bad format. Blob must remain valid for the lifetime of the // Metadata. bool AMDGPUPALMetadata::setFromBlob(unsigned Type, StringRef Blob) { - assert(Type == ELF::NT_AMD_AMDGPU_PAL_METADATA); + BlobType = Type; + switch (Type) { + case ELF::NT_AMD_AMDGPU_PAL_METADATA: + return setFromLegacyBlob(Blob); + case ELF::NT_AMD_AMDGPU_PAL_METADATA_MSGPACK: + return setFromMsgPackBlob(Blob); + default: + return false; + } +} + +// Set PAL metadata from legacy (array of key=value pairs) blob. +bool AMDGPUPALMetadata::setFromLegacyBlob(StringRef Blob) { auto Data = reinterpret_cast(Blob.data()); for (unsigned I = 0; I != Blob.size() / sizeof(uint32_t) / 2; ++I) setRegister(Data[I * 2], Data[I * 2 + 1]); return true; } +// Set PAL metadata from msgpack blob. +bool AMDGPUPALMetadata::setFromMsgPackBlob(StringRef Blob) { + msgpack::Reader Reader(Blob); + return MsgPackDoc.readFromBlob(Blob, /*Multi=*/false); +} + // Given the calling convention, calculate the register number for rsrc1. In // principle the register number could change in future hardware, but we know // it is the same for gfx6-9 (except that LS and ES don't exist on gfx9), so @@ -128,71 +158,527 @@ } // Get a register from the metadata, or 0 if not currently set. -unsigned AMDGPUPALMetadata::getRegister(unsigned Reg) { return Registers[Reg]; } +unsigned AMDGPUPALMetadata::getRegister(unsigned Reg) { + auto Regs = getRegisters(); + auto It = Regs.find(MsgPackDoc.getNode(Reg)); + if (It == Regs.end()) + return 0; + auto N = It->second; + if (N.getKind() != msgpack::Type::UInt) + return 0; + return N.getUInt(); +} // Set a register in the metadata. // In fact this ORs the value into any previous setting of the register. void AMDGPUPALMetadata::setRegister(unsigned Reg, unsigned Val) { - Registers[Reg] |= Val; + if (BlobType == ELF::NT_AMD_AMDGPU_PAL_METADATA_MSGPACK) { + // In the new MsgPack format, ignore register numbered >= 0x10000000. It + // is a PAL ABI pseudo-register in the old non-MsgPack format. + if (Reg >= 0x10000000) + return; + } + auto &N = getRegisters()[MsgPackDoc.getNode(Reg)]; + if (N.getKind() == msgpack::Type::UInt) + Val |= N.getUInt(); + N = N.getDocument()->getNode(Val); } -// Set the number of used vgprs in the metadata. This is an optional advisory -// record for logging etc; wave dispatch actually uses the rsrc1 register for -// the shader stage to determine the number of vgprs to allocate. +// Set the entry point name for one shader. +void AMDGPUPALMetadata::setEntryPoint(unsigned CC, StringRef Name) { + if (BlobType != ELF::NT_AMD_AMDGPU_PAL_METADATA_MSGPACK) + return; + // Msgpack format. + getHwStage(CC)[".entry_point"] = MsgPackDoc.getNode(Name, /*Copy=*/true); +} + +// Set the number of used vgprs in the metadata. This is an optional +// advisory record for logging etc; wave dispatch actually uses the rsrc1 +// register for the shader stage to determine the number of vgprs to +// allocate. void AMDGPUPALMetadata::setNumUsedVgprs(CallingConv::ID CC, unsigned Val) { - unsigned NumUsedVgprsKey = getScratchSizeKey(CC) + - PALMD::Key::VS_NUM_USED_VGPRS - - PALMD::Key::VS_SCRATCH_SIZE; - Registers[NumUsedVgprsKey] = Val; + if (BlobType != ELF::NT_AMD_AMDGPU_PAL_METADATA_MSGPACK) { + // Old non-msgpack format. + unsigned NumUsedVgprsKey = getScratchSizeKey(CC) + + PALMD::Key::VS_NUM_USED_VGPRS - + PALMD::Key::VS_SCRATCH_SIZE; + setRegister(NumUsedVgprsKey, Val); + return; + } + // Msgpack format. + getHwStage(CC)[".vgpr_count"] = MsgPackDoc.getNode(Val); } // Set the number of used sgprs in the metadata. This is an optional advisory // record for logging etc; wave dispatch actually uses the rsrc1 register for // the shader stage to determine the number of sgprs to allocate. void AMDGPUPALMetadata::setNumUsedSgprs(CallingConv::ID CC, unsigned Val) { - unsigned NumUsedSgprsKey = getScratchSizeKey(CC) + - PALMD::Key::VS_NUM_USED_SGPRS - - PALMD::Key::VS_SCRATCH_SIZE; - Registers[NumUsedSgprsKey] = Val; + if (BlobType != ELF::NT_AMD_AMDGPU_PAL_METADATA_MSGPACK) { + // Old non-msgpack format. + unsigned NumUsedSgprsKey = getScratchSizeKey(CC) + + PALMD::Key::VS_NUM_USED_SGPRS - + PALMD::Key::VS_SCRATCH_SIZE; + setRegister(NumUsedSgprsKey, Val); + return; + } + // Msgpack format. + getHwStage(CC)[".sgpr_count"] = MsgPackDoc.getNode(Val); } // Set the scratch size in the metadata. void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) { - Registers[getScratchSizeKey(CC)] = Val; + if (BlobType != ELF::NT_AMD_AMDGPU_PAL_METADATA_MSGPACK) { + // Old non-msgpack format. + setRegister(getScratchSizeKey(CC), Val); + return; + } + // Msgpack format. + getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val); +} + +// Convert a register number to name, for display by toString(). +// Returns nullptr if none. +static const char *getRegisterName(unsigned RegNum) { + // Table of registers. + static const struct RegInfo { + unsigned Num; + const char *Name; + } RegInfoTable[] = { + // Registers that code generation sets/modifies metadata for. + {PALMD::R_2C4A_SPI_SHADER_PGM_RSRC1_VS, "SPI_SHADER_PGM_RSRC1_VS"}, + {PALMD::R_2C4A_SPI_SHADER_PGM_RSRC1_VS + 1, "SPI_SHADER_PGM_RSRC2_VS"}, + {PALMD::R_2D4A_SPI_SHADER_PGM_RSRC1_LS, "SPI_SHADER_PGM_RSRC1_LS"}, + {PALMD::R_2D4A_SPI_SHADER_PGM_RSRC1_LS + 1, "SPI_SHADER_PGM_RSRC2_LS"}, + {PALMD::R_2D0A_SPI_SHADER_PGM_RSRC1_HS, "SPI_SHADER_PGM_RSRC1_HS"}, + {PALMD::R_2D0A_SPI_SHADER_PGM_RSRC1_HS + 1, "SPI_SHADER_PGM_RSRC2_HS"}, + {PALMD::R_2CCA_SPI_SHADER_PGM_RSRC1_ES, "SPI_SHADER_PGM_RSRC1_ES"}, + {PALMD::R_2CCA_SPI_SHADER_PGM_RSRC1_ES + 1, "SPI_SHADER_PGM_RSRC2_ES"}, + {PALMD::R_2C8A_SPI_SHADER_PGM_RSRC1_GS, "SPI_SHADER_PGM_RSRC1_GS"}, + {PALMD::R_2C8A_SPI_SHADER_PGM_RSRC1_GS + 1, "SPI_SHADER_PGM_RSRC2_GS"}, + {PALMD::R_2E12_COMPUTE_PGM_RSRC1, "COMPUTE_PGM_RSRC1"}, + {PALMD::R_2E12_COMPUTE_PGM_RSRC1 + 1, "COMPUTE_PGM_RSRC2"}, + {PALMD::R_2C0A_SPI_SHADER_PGM_RSRC1_PS, "SPI_SHADER_PGM_RSRC1_PS"}, + {PALMD::R_2C0A_SPI_SHADER_PGM_RSRC1_PS + 1, "SPI_SHADER_PGM_RSRC2_PS"}, + {PALMD::R_A1B3_SPI_PS_INPUT_ENA, "SPI_PS_INPUT_ENA"}, + {PALMD::R_A1B4_SPI_PS_INPUT_ADDR, "SPI_PS_INPUT_ADDR"}, + + // Registers not known to code generation. + {0x2c07, "SPI_SHADER_PGM_RSRC3_PS"}, + {0x2c46, "SPI_SHADER_PGM_RSRC3_VS"}, + {0x2c87, "SPI_SHADER_PGM_RSRC3_GS"}, + {0x2cc7, "SPI_SHADER_PGM_RSRC3_ES"}, + {0x2d07, "SPI_SHADER_PGM_RSRC3_HS"}, + {0x2d47, "SPI_SHADER_PGM_RSRC3_LS"}, + + {0xa1c3, "SPI_SHADER_POS_FORMAT"}, + {0xa1b1, "SPI_VS_OUT_CONFIG"}, + {0xa207, "PA_CL_VS_OUT_CNTL"}, + {0xa204, "PA_CL_CLIP_CNTL"}, + {0xa206, "PA_CL_VTE_CNTL"}, + {0xa2f9, "PA_SU_VTX_CNTL"}, + {0xa293, "PA_SC_MODE_CNTL_1"}, + {0xa2a1, "VGT_PRIMITIVEID_EN"}, + {0x2c81, "SPI_SHADER_PGM_RSRC4_GS"}, + {0x2e18, "COMPUTE_TMPRING_SIZE"}, + {0xa1b5, "SPI_INTERP_CONTROL_0"}, + {0xa1ba, "SPI_TMPRING_SIZE"}, + {0xa1c4, "SPI_SHADER_Z_FORMAT"}, + {0xa1c5, "SPI_SHADER_COL_FORMAT"}, + {0xa203, "DB_SHADER_CONTROL"}, + {0xa08f, "CB_SHADER_MASK"}, + {0xa1b6, "SPI_PS_IN_CONTROL"}, + {0xa191, "SPI_PS_INPUT_CNTL_0"}, + {0xa192, "SPI_PS_INPUT_CNTL_1"}, + {0xa193, "SPI_PS_INPUT_CNTL_2"}, + {0xa194, "SPI_PS_INPUT_CNTL_3"}, + {0xa195, "SPI_PS_INPUT_CNTL_4"}, + {0xa196, "SPI_PS_INPUT_CNTL_5"}, + {0xa197, "SPI_PS_INPUT_CNTL_6"}, + {0xa198, "SPI_PS_INPUT_CNTL_7"}, + {0xa199, "SPI_PS_INPUT_CNTL_8"}, + {0xa19a, "SPI_PS_INPUT_CNTL_9"}, + {0xa19b, "SPI_PS_INPUT_CNTL_10"}, + {0xa19c, "SPI_PS_INPUT_CNTL_11"}, + {0xa19d, "SPI_PS_INPUT_CNTL_12"}, + {0xa19e, "SPI_PS_INPUT_CNTL_13"}, + {0xa19f, "SPI_PS_INPUT_CNTL_14"}, + {0xa1a0, "SPI_PS_INPUT_CNTL_15"}, + {0xa1a1, "SPI_PS_INPUT_CNTL_16"}, + {0xa1a2, "SPI_PS_INPUT_CNTL_17"}, + {0xa1a3, "SPI_PS_INPUT_CNTL_18"}, + {0xa1a4, "SPI_PS_INPUT_CNTL_19"}, + {0xa1a5, "SPI_PS_INPUT_CNTL_20"}, + {0xa1a6, "SPI_PS_INPUT_CNTL_21"}, + {0xa1a7, "SPI_PS_INPUT_CNTL_22"}, + {0xa1a8, "SPI_PS_INPUT_CNTL_23"}, + {0xa1a9, "SPI_PS_INPUT_CNTL_24"}, + {0xa1aa, "SPI_PS_INPUT_CNTL_25"}, + {0xa1ab, "SPI_PS_INPUT_CNTL_26"}, + {0xa1ac, "SPI_PS_INPUT_CNTL_27"}, + {0xa1ad, "SPI_PS_INPUT_CNTL_28"}, + {0xa1ae, "SPI_PS_INPUT_CNTL_29"}, + {0xa1af, "SPI_PS_INPUT_CNTL_30"}, + {0xa1b0, "SPI_PS_INPUT_CNTL_31"}, + + {0xa2ce, "VGT_GS_MAX_VERT_OUT"}, + {0xa2ab, "VGT_ESGS_RING_ITEMSIZE"}, + {0xa290, "VGT_GS_MODE"}, + {0xa291, "VGT_GS_ONCHIP_CNTL"}, + {0xa2d7, "VGT_GS_VERT_ITEMSIZE"}, + {0xa2d8, "VGT_GS_VERT_ITEMSIZE_1"}, + {0xa2d9, "VGT_GS_VERT_ITEMSIZE_2"}, + {0xa2da, "VGT_GS_VERT_ITEMSIZE_3"}, + {0xa298, "VGT_GSVS_RING_OFFSET_1"}, + {0xa299, "VGT_GSVS_RING_OFFSET_2"}, + {0xa29a, "VGT_GSVS_RING_OFFSET_3"}, + + {0xa2e4, "VGT_GS_INSTANCE_CNT"}, + {0xa297, "VGT_GS_PER_VS"}, + {0xa29b, "VGT_GS_OUT_PRIM_TYPE"}, + {0xa2ac, "VGT_GSVS_RING_ITEMSIZE"}, + + {0xa2d5, "VGT_SHADER_STAGES_EN"}, + {0xa2ad, "VGT_REUSE_OFF"}, + {0xa1b8, "SPI_BARYC_CNTL"}, + + {0x2c4c, "SPI_SHADER_USER_DATA_VS_0"}, + {0x2c4d, "SPI_SHADER_USER_DATA_VS_1"}, + {0x2c4e, "SPI_SHADER_USER_DATA_VS_2"}, + {0x2c4f, "SPI_SHADER_USER_DATA_VS_3"}, + {0x2c50, "SPI_SHADER_USER_DATA_VS_4"}, + {0x2c51, "SPI_SHADER_USER_DATA_VS_5"}, + {0x2c52, "SPI_SHADER_USER_DATA_VS_6"}, + {0x2c53, "SPI_SHADER_USER_DATA_VS_7"}, + {0x2c54, "SPI_SHADER_USER_DATA_VS_8"}, + {0x2c55, "SPI_SHADER_USER_DATA_VS_9"}, + {0x2c56, "SPI_SHADER_USER_DATA_VS_10"}, + {0x2c57, "SPI_SHADER_USER_DATA_VS_11"}, + {0x2c58, "SPI_SHADER_USER_DATA_VS_12"}, + {0x2c59, "SPI_SHADER_USER_DATA_VS_13"}, + {0x2c5a, "SPI_SHADER_USER_DATA_VS_14"}, + {0x2c5b, "SPI_SHADER_USER_DATA_VS_15"}, + {0x2c5c, "SPI_SHADER_USER_DATA_VS_16"}, + {0x2c5d, "SPI_SHADER_USER_DATA_VS_17"}, + {0x2c5e, "SPI_SHADER_USER_DATA_VS_18"}, + {0x2c5f, "SPI_SHADER_USER_DATA_VS_19"}, + {0x2c60, "SPI_SHADER_USER_DATA_VS_20"}, + {0x2c61, "SPI_SHADER_USER_DATA_VS_21"}, + {0x2c62, "SPI_SHADER_USER_DATA_VS_22"}, + {0x2c63, "SPI_SHADER_USER_DATA_VS_23"}, + {0x2c64, "SPI_SHADER_USER_DATA_VS_24"}, + {0x2c65, "SPI_SHADER_USER_DATA_VS_25"}, + {0x2c66, "SPI_SHADER_USER_DATA_VS_26"}, + {0x2c67, "SPI_SHADER_USER_DATA_VS_27"}, + {0x2c68, "SPI_SHADER_USER_DATA_VS_28"}, + {0x2c69, "SPI_SHADER_USER_DATA_VS_29"}, + {0x2c6a, "SPI_SHADER_USER_DATA_VS_30"}, + {0x2c6b, "SPI_SHADER_USER_DATA_VS_31"}, + + {0x2ccc, "SPI_SHADER_USER_DATA_ES_0"}, + {0x2ccd, "SPI_SHADER_USER_DATA_ES_1"}, + {0x2cce, "SPI_SHADER_USER_DATA_ES_2"}, + {0x2ccf, "SPI_SHADER_USER_DATA_ES_3"}, + {0x2cd0, "SPI_SHADER_USER_DATA_ES_4"}, + {0x2cd1, "SPI_SHADER_USER_DATA_ES_5"}, + {0x2cd2, "SPI_SHADER_USER_DATA_ES_6"}, + {0x2cd3, "SPI_SHADER_USER_DATA_ES_7"}, + {0x2cd4, "SPI_SHADER_USER_DATA_ES_8"}, + {0x2cd5, "SPI_SHADER_USER_DATA_ES_9"}, + {0x2cd6, "SPI_SHADER_USER_DATA_ES_10"}, + {0x2cd7, "SPI_SHADER_USER_DATA_ES_11"}, + {0x2cd8, "SPI_SHADER_USER_DATA_ES_12"}, + {0x2cd9, "SPI_SHADER_USER_DATA_ES_13"}, + {0x2cda, "SPI_SHADER_USER_DATA_ES_14"}, + {0x2cdb, "SPI_SHADER_USER_DATA_ES_15"}, + {0x2cdc, "SPI_SHADER_USER_DATA_ES_16"}, + {0x2cdd, "SPI_SHADER_USER_DATA_ES_17"}, + {0x2cde, "SPI_SHADER_USER_DATA_ES_18"}, + {0x2cdf, "SPI_SHADER_USER_DATA_ES_19"}, + {0x2ce0, "SPI_SHADER_USER_DATA_ES_20"}, + {0x2ce1, "SPI_SHADER_USER_DATA_ES_21"}, + {0x2ce2, "SPI_SHADER_USER_DATA_ES_22"}, + {0x2ce3, "SPI_SHADER_USER_DATA_ES_23"}, + {0x2ce4, "SPI_SHADER_USER_DATA_ES_24"}, + {0x2ce5, "SPI_SHADER_USER_DATA_ES_25"}, + {0x2ce6, "SPI_SHADER_USER_DATA_ES_26"}, + {0x2ce7, "SPI_SHADER_USER_DATA_ES_27"}, + {0x2ce8, "SPI_SHADER_USER_DATA_ES_28"}, + {0x2ce9, "SPI_SHADER_USER_DATA_ES_29"}, + {0x2cea, "SPI_SHADER_USER_DATA_ES_30"}, + {0x2ceb, "SPI_SHADER_USER_DATA_ES_31"}, + + {0x2c0c, "SPI_SHADER_USER_DATA_PS_0"}, + {0x2c0d, "SPI_SHADER_USER_DATA_PS_1"}, + {0x2c0e, "SPI_SHADER_USER_DATA_PS_2"}, + {0x2c0f, "SPI_SHADER_USER_DATA_PS_3"}, + {0x2c10, "SPI_SHADER_USER_DATA_PS_4"}, + {0x2c11, "SPI_SHADER_USER_DATA_PS_5"}, + {0x2c12, "SPI_SHADER_USER_DATA_PS_6"}, + {0x2c13, "SPI_SHADER_USER_DATA_PS_7"}, + {0x2c14, "SPI_SHADER_USER_DATA_PS_8"}, + {0x2c15, "SPI_SHADER_USER_DATA_PS_9"}, + {0x2c16, "SPI_SHADER_USER_DATA_PS_10"}, + {0x2c17, "SPI_SHADER_USER_DATA_PS_11"}, + {0x2c18, "SPI_SHADER_USER_DATA_PS_12"}, + {0x2c19, "SPI_SHADER_USER_DATA_PS_13"}, + {0x2c1a, "SPI_SHADER_USER_DATA_PS_14"}, + {0x2c1b, "SPI_SHADER_USER_DATA_PS_15"}, + {0x2c1c, "SPI_SHADER_USER_DATA_PS_16"}, + {0x2c1d, "SPI_SHADER_USER_DATA_PS_17"}, + {0x2c1e, "SPI_SHADER_USER_DATA_PS_18"}, + {0x2c1f, "SPI_SHADER_USER_DATA_PS_19"}, + {0x2c20, "SPI_SHADER_USER_DATA_PS_20"}, + {0x2c21, "SPI_SHADER_USER_DATA_PS_21"}, + {0x2c22, "SPI_SHADER_USER_DATA_PS_22"}, + {0x2c23, "SPI_SHADER_USER_DATA_PS_23"}, + {0x2c24, "SPI_SHADER_USER_DATA_PS_24"}, + {0x2c25, "SPI_SHADER_USER_DATA_PS_25"}, + {0x2c26, "SPI_SHADER_USER_DATA_PS_26"}, + {0x2c27, "SPI_SHADER_USER_DATA_PS_27"}, + {0x2c28, "SPI_SHADER_USER_DATA_PS_28"}, + {0x2c29, "SPI_SHADER_USER_DATA_PS_29"}, + {0x2c2a, "SPI_SHADER_USER_DATA_PS_30"}, + {0x2c2b, "SPI_SHADER_USER_DATA_PS_31"}, + + {0x2e40, "COMPUTE_USER_DATA_0"}, + {0x2e41, "COMPUTE_USER_DATA_1"}, + {0x2e42, "COMPUTE_USER_DATA_2"}, + {0x2e43, "COMPUTE_USER_DATA_3"}, + {0x2e44, "COMPUTE_USER_DATA_4"}, + {0x2e45, "COMPUTE_USER_DATA_5"}, + {0x2e46, "COMPUTE_USER_DATA_6"}, + {0x2e47, "COMPUTE_USER_DATA_7"}, + {0x2e48, "COMPUTE_USER_DATA_8"}, + {0x2e49, "COMPUTE_USER_DATA_9"}, + {0x2e4a, "COMPUTE_USER_DATA_10"}, + {0x2e4b, "COMPUTE_USER_DATA_11"}, + {0x2e4c, "COMPUTE_USER_DATA_12"}, + {0x2e4d, "COMPUTE_USER_DATA_13"}, + {0x2e4e, "COMPUTE_USER_DATA_14"}, + {0x2e4f, "COMPUTE_USER_DATA_15"}, + + {0x2e07, "COMPUTE_NUM_THREAD_X"}, + {0x2e08, "COMPUTE_NUM_THREAD_Y"}, + {0x2e09, "COMPUTE_NUM_THREAD_Z"}, + {0xa2db, "VGT_TF_PARAM"}, + {0xa2d6, "VGT_LS_HS_CONFIG"}, + {0xa287, "VGT_HOS_MIN_TESS_LEVEL"}, + {0xa286, "VGT_HOS_MAX_TESS_LEVEL"}, + {0xa2f8, "PA_SC_AA_CONFIG"}, + {0xa310, "PA_SC_SHADER_CONTROL"}, + {0xa313, "PA_SC_CONSERVATIVE_RASTERIZATION_CNTL"}, + + {0x2d0c, "SPI_SHADER_USER_DATA_LS_0"}, + {0x2d0d, "SPI_SHADER_USER_DATA_LS_1"}, + {0x2d0e, "SPI_SHADER_USER_DATA_LS_2"}, + {0x2d0f, "SPI_SHADER_USER_DATA_LS_3"}, + {0x2d10, "SPI_SHADER_USER_DATA_LS_4"}, + {0x2d11, "SPI_SHADER_USER_DATA_LS_5"}, + {0x2d12, "SPI_SHADER_USER_DATA_LS_6"}, + {0x2d13, "SPI_SHADER_USER_DATA_LS_7"}, + {0x2d14, "SPI_SHADER_USER_DATA_LS_8"}, + {0x2d15, "SPI_SHADER_USER_DATA_LS_9"}, + {0x2d16, "SPI_SHADER_USER_DATA_LS_10"}, + {0x2d17, "SPI_SHADER_USER_DATA_LS_11"}, + {0x2d18, "SPI_SHADER_USER_DATA_LS_12"}, + {0x2d19, "SPI_SHADER_USER_DATA_LS_13"}, + {0x2d1a, "SPI_SHADER_USER_DATA_LS_14"}, + {0x2d1b, "SPI_SHADER_USER_DATA_LS_15"}, + {0x2d1c, "SPI_SHADER_USER_DATA_LS_16"}, + {0x2d1d, "SPI_SHADER_USER_DATA_LS_17"}, + {0x2d1e, "SPI_SHADER_USER_DATA_LS_18"}, + {0x2d1f, "SPI_SHADER_USER_DATA_LS_19"}, + {0x2d20, "SPI_SHADER_USER_DATA_LS_20"}, + {0x2d21, "SPI_SHADER_USER_DATA_LS_21"}, + {0x2d22, "SPI_SHADER_USER_DATA_LS_22"}, + {0x2d23, "SPI_SHADER_USER_DATA_LS_23"}, + {0x2d24, "SPI_SHADER_USER_DATA_LS_24"}, + {0x2d25, "SPI_SHADER_USER_DATA_LS_25"}, + {0x2d26, "SPI_SHADER_USER_DATA_LS_26"}, + {0x2d27, "SPI_SHADER_USER_DATA_LS_27"}, + {0x2d28, "SPI_SHADER_USER_DATA_LS_28"}, + {0x2d29, "SPI_SHADER_USER_DATA_LS_29"}, + {0x2d2a, "SPI_SHADER_USER_DATA_LS_30"}, + {0x2d2b, "SPI_SHADER_USER_DATA_LS_31"}, + + {0xa2aa, "IA_MULTI_VGT_PARAM"}, + {0xa2a5, "VGT_GS_MAX_PRIMS_PER_SUBGROUP"}, + {0xa2e6, "VGT_STRMOUT_BUFFER_CONFIG"}, + {0xa2e5, "VGT_STRMOUT_CONFIG"}, + {0xa2b5, "VGT_STRMOUT_VTX_STRIDE_0"}, + {0xa2b9, "VGT_STRMOUT_VTX_STRIDE_1"}, + {0xa2bd, "VGT_STRMOUT_VTX_STRIDE_2"}, + {0xa2c1, "VGT_STRMOUT_VTX_STRIDE_3"}, + {0xa316, "VGT_VERTEX_REUSE_BLOCK_CNTL"}, + + {0, nullptr}}; + auto Entry = RegInfoTable; + for (; Entry->Num && Entry->Num != RegNum; ++Entry) + ; + return Entry->Name; } // Convert the accumulated PAL metadata into an asm directive. void AMDGPUPALMetadata::toString(std::string &String) { String.clear(); - if (Registers.empty()) - return; raw_string_ostream Stream(String); - Stream << "\t" << AMDGPU::PALMD::AssemblerDirective << " "; - for (auto I = Registers.begin(), E = Registers.end(); I != E; ++I) { - if (I != Registers.begin()) - Stream << ","; - Stream << "0x" << Twine::utohexstr(I->first) << ",0x" - << Twine::utohexstr(I->second); + if (BlobType != ELF::NT_AMD_AMDGPU_PAL_METADATA_MSGPACK) { + if (MsgPackDoc.getRoot().getKind() == msgpack::Type::Nil) + return; + // Old linear reg=val format. + Stream << "\t" << AMDGPU::PALMD::AssemblerDirective << " "; + auto Regs = getRegisters(); + for (auto I = Regs.begin(), E = Regs.end(); I != E; ++I) { + if (I != Regs.begin()) + Stream << ","; + unsigned Reg = I->first.getUInt(); + unsigned Val = I->second.getUInt(); + Stream << "0x" << Twine::utohexstr(Reg) << ",0x" << Twine::utohexstr(Val); + } + Stream << "\n"; + return; } - Stream << "\n"; + + // New msgpack-based format -- output as YAML (with unsigned numbers in hex), + // but first change the registers map to use names. + MsgPackDoc.setHexMode(); + auto &RegsObj = refRegisters(); + auto OrigRegs = RegsObj.getMap(); + RegsObj = MsgPackDoc.getMapNode(); + for (auto I : OrigRegs) { + auto Key = I.first; + if (const char *RegName = getRegisterName(Key.getUInt())) { + std::string KeyName = Key.toString(); + KeyName += " ("; + KeyName += RegName; + KeyName += ")"; + Key = MsgPackDoc.getNode(KeyName, /*Copy=*/true); + } + RegsObj.getMap()[Key] = I.second; + } + + // Output as YAML. + Stream << "\t" << AMDGPU::PALMD::AssemblerDirectiveBegin << "\n"; + MsgPackDoc.toYAML(Stream); + Stream << "\t" << AMDGPU::PALMD::AssemblerDirectiveEnd << "\n"; + + // Restore original registers map. + RegsObj = OrigRegs; } // Convert the accumulated PAL metadata into a binary blob for writing as // a .note record of the specified AMD type. void AMDGPUPALMetadata::toBlob(unsigned Type, std::string &Blob) { + switch (Type) { + case ELF::NT_AMD_AMDGPU_PAL_METADATA: + toLegacyBlob(Blob); + break; + case ELF::NT_AMD_AMDGPU_PAL_METADATA_MSGPACK: + toMsgPackBlob(Blob); + break; + default: + break; + } +} + +void AMDGPUPALMetadata::toLegacyBlob(std::string &Blob) { Blob.clear(); - if (Type != ELF::NT_AMD_AMDGPU_PAL_METADATA) - return; - if (Registers.empty()) + auto Registers = getRegisters(); + if (Registers.getMap().empty()) return; - for (auto I : Registers) { + for (auto I : Registers.getMap()) { union { char AsBytes[8]; uint32_t AsInts[2]; } U; - U.AsInts[0] = I.first; - U.AsInts[1] = I.second; + U.AsInts[0] = I.first.getUInt(); + U.AsInts[1] = I.second.getUInt(); Blob.insert(Blob.size(), &U.AsBytes[0], 8); } } +void AMDGPUPALMetadata::toMsgPackBlob(std::string &Blob) { + Blob.clear(); + MsgPackDoc.writeToBlob(Blob); +} + +// Set PAL metadata from YAML text. Returns false if failed. +bool AMDGPUPALMetadata::setFromString(StringRef S) { + BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA_MSGPACK; + if (!MsgPackDoc.fromYAML(S)) + return false; + + // In the registers map, some keys may be of the form "0xa191 + // (SPI_PS_INPUT_CNTL_0)", in which case the YAML input code made it a + // string. We need to turn it into a number. + auto &RegsObj = refRegisters(); + auto OrigRegs = RegsObj; + RegsObj = MsgPackDoc.getMapNode(); + Registers = RegsObj.getMap(); + bool Ok = true; + for (auto I : OrigRegs.getMap()) { + auto Key = I.first; + if (Key.getKind() == msgpack::Type::String) { + StringRef S = Key.getString(); + uint64_t Val; + if (S.consumeInteger(0, Val)) { + Ok = false; + errs() << "Unrecognized PAL metadata register key '" << S << "'\n"; + continue; + } + Key = MsgPackDoc.getNode(uint64_t(Val)); + } + Registers.getMap()[Key] = I.second; + } + return Ok; +} + +// Reference (create if necessary) the node for the registers map. +msgpack::DocNode &AMDGPUPALMetadata::refRegisters() { + auto &N = + MsgPackDoc.getRoot() + .getMap(/*Convert=*/true)[MsgPackDoc.getNode("amdpal.pipelines")] + .getArray(/*Convert=*/true)[0] + .getMap(/*Convert=*/true)[MsgPackDoc.getNode(".registers")]; + N.getMap(/*Convert=*/true); + return N; +} + +// Get (create if necessary) the registers map. +msgpack::MapDocNode AMDGPUPALMetadata::getRegisters() { + if (Registers.isEmpty()) + Registers = refRegisters(); + return Registers.getMap(); +} + +// Return the PAL metadata hardware shader stage name. +static const char *getStageName(CallingConv::ID CC) { + switch (CC) { + case CallingConv::AMDGPU_PS: + return ".ps"; + case CallingConv::AMDGPU_VS: + return ".vs"; + case CallingConv::AMDGPU_GS: + return ".gs"; + case CallingConv::AMDGPU_ES: + return ".es"; + case CallingConv::AMDGPU_HS: + return ".hs"; + case CallingConv::AMDGPU_LS: + return ".ls"; + default: + return ".cs"; + } +} + +// Get (create if necessary) the .hardware_stages entry for the given calling +// convention. +msgpack::MapDocNode AMDGPUPALMetadata::getHwStage(unsigned CC) { + if (HwStages.isEmpty()) + HwStages = MsgPackDoc.getRoot() + .getMap(/*Convert=*/true)["amdpal.pipelines"] + .getArray(/*Convert=*/true)[0] + .getMap(/*Convert=*/true)[".hardware_stages"] + .getMap(/*Convert=*/true); + return HwStages.getMap()[getStageName(CC)].getMap(/*Convert=*/true); +} Index: test/CodeGen/AMDGPU/amdpal-msgpack-cs.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/amdpal-msgpack-cs.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s + +; amdpal compute shader: check for 0x2e12 (COMPUTE_PGM_RSRC1) in pal metadata +; GCN-LABEL: {{^}}cs_amdpal: +; GCN: .amdgpu_pal_metadata +; GCN: 0x2e12 (COMPUTE_PGM_RSRC1) +define amdgpu_cs half @cs_amdpal(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + +; Force MsgPack format metadata +!amdgpu.pal.metadata.msgpack = !{!0} +!0 = !{!""} Index: test/CodeGen/AMDGPU/amdpal-msgpack-es.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/amdpal-msgpack-es.ll @@ -0,0 +1,15 @@ +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +; amdpal evaluation shader: check for 0x2cca (SPI_SHADER_PGM_RSRC1_ES) in pal metadata +; GCN-LABEL: {{^}}es_amdpal: +; GCN: .amdgpu_pal_metadata +; GCN: 0x2cca (SPI_SHADER_PGM_RSRC1_ES) +define amdgpu_es half @es_amdpal(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + +; Force MsgPack format metadata +!amdgpu.pal.metadata.msgpack = !{!0} +!0 = !{!""} Index: test/CodeGen/AMDGPU/amdpal-msgpack-gs.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/amdpal-msgpack-gs.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s + +; amdpal geometry shader: check for 0x2c8a (SPI_SHADER_PGM_RSRC1_GS) in pal metadata +; GCN-LABEL: {{^}}gs_amdpal: +; GCN: .amdgpu_pal_metadata +; GCN: 0x2c8a (SPI_SHADER_PGM_RSRC1_GS) +define amdgpu_gs half @gs_amdpal(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + +; Force MsgPack format metadata +!amdgpu.pal.metadata.msgpack = !{!0} +!0 = !{!""} Index: test/CodeGen/AMDGPU/amdpal-msgpack-hs.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/amdpal-msgpack-hs.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s + +; amdpal hull shader: check for 0x2d0a (SPI_SHADER_PGM_RSRC1_HS) in pal metadata +; GCN-LABEL: {{^}}hs_amdpal: +; GCN: .amdgpu_pal_metadata +; GCN: 0x2d0a (SPI_SHADER_PGM_RSRC1_HS) +define amdgpu_hs half @hs_amdpal(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + +; Force MsgPack format metadata +!amdgpu.pal.metadata.msgpack = !{!0} +!0 = !{!""} Index: test/CodeGen/AMDGPU/amdpal-msgpack-ls.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/amdpal-msgpack-ls.ll @@ -0,0 +1,15 @@ +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +; amdpal load shader: check for 0x2d4a (SPI_SHADER_PGM_RSRC1_LS) in pal metadata +; GCN-LABEL: {{^}}ls_amdpal: +; GCN: .amdgpu_pal_metadata +; GCN: 0x2d4a (SPI_SHADER_PGM_RSRC1_LS) +define amdgpu_ls half @ls_amdpal(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + +; Force MsgPack format metadata +!amdgpu.pal.metadata.msgpack = !{!0} +!0 = !{!""} Index: test/CodeGen/AMDGPU/amdpal-msgpack-ps.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/amdpal-msgpack-ps.ll @@ -0,0 +1,36 @@ +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s + +; amdpal pixel shader: check for 0x2c0a (SPI_SHADER_PGM_RSRC1_PS) in pal +; metadata. Check for 0x2c0b (SPI_SHADER_PGM_RSRC2_PS) in pal metadata, and +; it has a value starting 0x42 as it is set to 0x42000000 in the metadata +; below. Also check that .internal_pipeline_hash is propagated. +; GCN-LABEL: {{^}}ps_amdpal: +; GCN: .amdgpu_pal_metadata +; GCN: .internal_pipeline_hash: +; GCN-NEXT: - 0x123456789abcdef0 +; GCN-NEXT: - 0xfedcba9876543210 +; GCN: .registers: +; GCN: 0x2c0a (SPI_SHADER_PGM_RSRC1_PS): +; GCN: 0x2c0b (SPI_SHADER_PGM_RSRC2_PS): 0x42 +define amdgpu_ps half @ps_amdpal(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + +; amdgpu.pal.metadata.msgpack represents this: +; +; .amdgpu_pal_metadata +; --- +; amdpal.pipelines: +; - .internal_pipeline_hash: +; - 0x123456789abcdef0 +; - 0xfedcba9876543210 +; .registers: +; 0x2c0b (SPI_SHADER_PGM_RSRC2_PS): 0x42000000 +; ... +; .end_amdgpu_pal_metadata + +!amdgpu.pal.metadata.msgpack = !{!0} +!0 = !{!"\81\b0\61\6d\64\70\61\6c\2e\70\69\70\65\6c\69\6e\65\73\91\82\b7\2e\69\6e\74\65\72\6e\61\6c\5f\70\69\70\65\6c\69\6e\65\5f\68\61\73\68\92\cf\12\34\56\78\9a\bc\de\f0\cf\fe\dc\ba\98\76\54\32\10\aa\2e\72\65\67\69\73\74\65\72\73\81\cd\2c\0b\ce\42\00\00\00"}; Index: test/CodeGen/AMDGPU/amdpal-msgpack-psenable.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/amdpal-msgpack-psenable.ll @@ -0,0 +1,27 @@ +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s + +; This pixel shader does not use the result of its interpolation, so it would +; end up with an interpolation mode set in PSAddr but not PSEnable. This test tests +; the workaround that ensures that an interpolation mode is also set in PSEnable. +; GCN-LABEL: {{^}}amdpal_psenable: +; GCN: .amdgpu_pal_metadata +; GCN: 0xa1b3 (SPI_PS_INPUT_ENA): 0x2 +; GCN: 0xa1b4 (SPI_PS_INPUT_ADDR): 0x2 +define amdgpu_ps void @amdpal_psenable(i32 inreg, i32 inreg, i32 inreg, i32 inreg %m0, <2 x float> %pos) #6 { + %inst23 = extractelement <2 x float> %pos, i32 0 + %inst24 = extractelement <2 x float> %pos, i32 1 + %inst25 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 0, i32 0, i32 %m0) + %inst26 = tail call float @llvm.amdgcn.interp.p2(float %inst25, float %inst24, i32 0, i32 0, i32 %m0) + ret void +} + +declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #2 +declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2 + +attributes #6 = { nounwind "InitialPSInputAddr"="2" } + +; Force MsgPack format metadata +!amdgpu.pal.metadata.msgpack = !{!0} +!0 = !{!""} Index: test/CodeGen/AMDGPU/amdpal-msgpack-vs.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/amdpal-msgpack-vs.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s + +; amdpal vertex shader: check for 45352 (SPI_SHADER_PGM_RSRC1_VS) in pal metadata +; GCN-LABEL: {{^}}vs_amdpal: +; GCN: .amdgpu_pal_metadata +; GCN: 0x2c4a (SPI_SHADER_PGM_RSRC1_VS) +define amdgpu_vs half @vs_amdpal(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + +; Force MsgPack format metadata +!amdgpu.pal.metadata.msgpack = !{!0} +!0 = !{!""} Index: test/MC/AMDGPU/pal-msgpack.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/pal-msgpack.s @@ -0,0 +1,57 @@ +// RUN: llvm-mc -triple amdgcn--amdpal -mcpu=kaveri -show-encoding %s | FileCheck %s --check-prefix=ASM +// RUN: llvm-mc -filetype=obj -triple amdgcn--amdpal -mcpu=kaveri -show-encoding %s | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF + + .amdgpu_pal_metadata +--- +amdpal.pipelines: + - .hardware_stages: + .ps: + .entry_point: ps_amdpal + .scratch_memory_size: 0 + .sgpr_count: 0x1 + .vgpr_count: 0x1 + .internal_pipeline_hash: + - 0x123456789abcdef0 + - 0xfedcba9876543210 + .registers: + 0x2c0a (SPI_SHADER_PGM_RSRC1_PS): 0 + 0x2c0b (SPI_SHADER_PGM_RSRC2_PS): 0x42000000 + 0xa1b3 (SPI_PS_INPUT_ENA): 0x1 + 0xa1b4 (SPI_PS_INPUT_ADDR): 0x1 +... + .end_amdgpu_pal_metadata + +// ASM: .amdgpu_pal_metadata +// ASM: --- +// ASM: amdpal.pipelines: +// ASM: - .hardware_stages: +// ASM: .ps: +// ASM: .entry_point: ps_amdpal +// ASM: .scratch_memory_size: 0 +// ASM: .sgpr_count: 0x1 +// ASM: .vgpr_count: 0x1 +// ASM: .internal_pipeline_hash: +// ASM: - 0x123456789abcdef0 +// ASM: - 0xfedcba9876543210 +// ASM: .registers: +// ASM: 0x2c0a (SPI_SHADER_PGM_RSRC1_PS): 0 +// ASM: 0x2c0b (SPI_SHADER_PGM_RSRC2_PS): 0x42000000 +// ASM: 0xa1b3 (SPI_PS_INPUT_ENA): 0x1 +// ASM: 0xa1b4 (SPI_PS_INPUT_ADDR): 0x1 +// ASM: ... +// ASM: .end_amdgpu_pal_metadata + +// ELF: SHT_NOTE +// ELF: 0000: 04000000 BD000000 0D000000 414D4400 |............AMD.| +// ELF: 0010: 81B0616D 6470616C 2E706970 656C696E |..amdpal.pipelin| +// ELF: 0020: 65739183 B02E6861 72647761 72655F73 |es....hardware_s| +// ELF: 0030: 74616765 7381A32E 707384AC 2E656E74 |tages...ps...ent| +// ELF: 0040: 72795F70 6F696E74 A970735F 616D6470 |ry_point.ps_amdp| +// ELF: 0050: 616CB42E 73637261 7463685F 6D656D6F |al..scratch_memo| +// ELF: 0060: 72795F73 697A6500 AB2E7367 70725F63 |ry_size...sgpr_c| +// ELF: 0070: 6F756E74 01AB2E76 6770725F 636F756E |ount...vgpr_coun| +// ELF: 0080: 7401B72E 696E7465 726E616C 5F706970 |t...internal_pip| +// ELF: 0090: 656C696E 655F6861 736892CF 12345678 |eline_hash...4Vx| +// ELF: 00A0: 9ABCDEF0 CFFEDCBA 98765432 10AA2E72 |.........vT2...r| +// ELF: 00B0: 65676973 74657273 84CD2C0A 00CD2C0B |egisters..,...,.| +// ELF: 00C0: CE420000 00CDA1B3 01CDA1B4 01000000 |.B..............|