Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -112,10 +112,12 @@ SIProgramInfo CurrentProgramInfo; DenseMap CallGraphResourceInfo; + std::map PalMetadata; uint64_t getFunctionCodeSize(const MachineFunction &MF) const; SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF) const; + void readPalMetadata(Module &M); void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF); void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo, const MachineFunction &MF) const; @@ -127,6 +129,7 @@ /// can correctly setup the GPU state. void EmitProgramInfoR600(const MachineFunction &MF); void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo); + void EmitPalMetadata(const MachineFunction &MF, const SIProgramInfo &KernelInfo); void emitCommonFunctionComments(uint32_t NumVGPR, uint32_t NumSGPR, uint32_t ScratchSize, Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -110,12 +110,18 @@ } void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { - if (TM.getTargetTriple().getOS() != Triple::AMDHSA) - return; - AMDGPU::IsaInfo::IsaVersion ISA = AMDGPU::IsaInfo::getIsaVersion(getSTI()->getFeatureBits()); + if (TM.getTargetTriple().getOS() == Triple::AMDPAL) { + readPalMetadata(M); + // AMDPAL wants an HSA_ISA .note. + getTargetStreamer().EmitDirectiveHSACodeObjectISA( + ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); + } + if (TM.getTargetTriple().getOS() != Triple::AMDHSA) + return; + getTargetStreamer().EmitDirectiveHSACodeObjectVersion(2, 1); getTargetStreamer().EmitDirectiveHSACodeObjectISA( ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); @@ -123,6 +129,17 @@ } void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { + if (TM.getTargetTriple().getOS() == Triple::AMDPAL) { + // Copy the PAL metadata from the map where we collected it into a vector, + // then write it as a .note. + std::vector Data; + for (auto i : PalMetadata) { + Data.push_back(i.first); + Data.push_back(i.second); + } + getTargetStreamer().EmitPalMetadata(Data); + } + if (TM.getTargetTriple().getOS() != Triple::AMDHSA) return; @@ -190,6 +207,27 @@ return AsmPrinter::doFinalization(M); } +// For the amdpal OS type, read the amdgpu.pal.metadata supplied by the +// frontend into our PalMetadata map, ready for per-function modification. It +// is a NamedMD containing an MDTuple containing a number of MDNodes each of +// which is an integer value, and each two integer values forms a key=value +// pair that we store as PalMetadata[key]=value in the map. +void AMDGPUAsmPrinter::readPalMetadata(Module &M) { + auto NamedMD = M.getNamedMetadata("amdgpu.pal.metadata"); + if (!NamedMD || !NamedMD->getNumOperands()) + return; + auto Tuple = dyn_cast(NamedMD->getOperand(0)); + if (!Tuple) + return; + for (unsigned I = 0, E = Tuple->getNumOperands() & -2; I != E; I += 2) { + auto Key = mdconst::dyn_extract(Tuple->getOperand(I)); + auto Val = mdconst::dyn_extract(Tuple->getOperand(I + 1)); + if (!Key || !Val) + continue; + PalMetadata[Key->getZExtValue()] = Val->getZExtValue(); + } +} + // Print comments that apply to both callable functions and entry points. void AMDGPUAsmPrinter::emitCommonFunctionComments( uint32_t NumVGPR, @@ -232,6 +270,8 @@ Info = analyzeResourceUsage(MF); } + if (STM.isAmdPalOS()) + EmitPalMetadata(MF, CurrentProgramInfo); if (!STM.isAmdHsaOS()) { EmitProgramInfoSI(MF, CurrentProgramInfo); } @@ -923,6 +963,74 @@ OutStreamer->EmitIntValue(MFI->getNumSpilledVGPRs(), 4); } +// This is the equivalent of EmitProgramInfoSI above, but for when the OS type +// is AMDPAL. It stores each compute/SPI register setting and other PAL +// metadata items into the PalMetadata map, combining with any provided by the +// frontend as LLVM metadata. Once all functions are written, PalMetadata is +// then written as a single block in the .note section. +void AMDGPUAsmPrinter::EmitPalMetadata(const MachineFunction &MF, + const SIProgramInfo &CurrentProgramInfo) { + const SIMachineFunctionInfo *MFI = MF.getInfo(); + // Given the calling convention, calculate the register number for rsrc1. In + // principle the register number could change in future hardware, but we know + // it is the same for gfx6-9 (except that LS and ES don't exist on gfx9), so + // we can use the same fixed value that .AMDGPU.config has for Mesa. Note + // that we use a register number rather than a byte offset, so we need to + // divide by 4. + unsigned Rsrc1Reg = getRsrcReg(MF.getFunction()->getCallingConv()) / 4; + unsigned Rsrc2Reg = Rsrc1Reg + 1; + // Also calculate the PAL metadata key for *S_SCRATCH_SIZE. It can be used + // with a constant offset to access any non-register shader-specific PAL + // metadata key. + unsigned ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_CS_SCRATCH_SIZE; + switch (MF.getFunction()->getCallingConv()) { + case CallingConv::AMDGPU_PS: + ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_PS_SCRATCH_SIZE; + break; + case CallingConv::AMDGPU_VS: + ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_SCRATCH_SIZE; + break; + case CallingConv::AMDGPU_GS: + ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_GS_SCRATCH_SIZE; + break; + case CallingConv::AMDGPU_ES: + ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_ES_SCRATCH_SIZE; + break; + case CallingConv::AMDGPU_HS: + ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_HS_SCRATCH_SIZE; + break; + case CallingConv::AMDGPU_LS: + ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_LS_SCRATCH_SIZE; + break; + } + unsigned NumUsedVgprsKey = ScratchSizeKey + + AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_NUM_USED_VGPRS + - AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_SCRATCH_SIZE; + unsigned NumUsedSgprsKey = ScratchSizeKey + + AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_NUM_USED_SGPRS + - AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_SCRATCH_SIZE; + PalMetadata[NumUsedVgprsKey] = CurrentProgramInfo.NumVGPRsForWavesPerEU; + PalMetadata[NumUsedSgprsKey] = CurrentProgramInfo.NumSGPRsForWavesPerEU; + if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) { + PalMetadata[Rsrc1Reg] |= CurrentProgramInfo.ComputePGMRSrc1; + PalMetadata[Rsrc2Reg] |= CurrentProgramInfo.ComputePGMRSrc2; + // ScratchSize is in bytes, 16 aligned. + PalMetadata[ScratchSizeKey] |= alignTo(CurrentProgramInfo.ScratchSize, 16); + } else { + PalMetadata[Rsrc1Reg] |= S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) + | S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks); + if (CurrentProgramInfo.ScratchBlocks > 0) + PalMetadata[Rsrc2Reg] |= S_00B84C_SCRATCH_EN(1); + // ScratchSize is in bytes, 16 aligned. + PalMetadata[ScratchSizeKey] |= alignTo(CurrentProgramInfo.ScratchSize, 16); + } + if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) { + PalMetadata[Rsrc2Reg] |= S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks); + PalMetadata[R_0286CC_SPI_PS_INPUT_ENA / 4] |= MFI->getPSInputEnable(); + PalMetadata[R_0286D0_SPI_PS_INPUT_ADDR / 4] |= MFI->getPSInputAddr(); + } +} + // This is supposed to be log2(Size) static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) { switch (Size) { Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUPTNote.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUPTNote.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUPTNote.h @@ -27,16 +27,49 @@ // TODO: Move this enum to include/llvm/Support so it can be used in tools? enum NoteType{ + NT_AMDGPU_HSA_RESERVED_0 = 0, NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1, NT_AMDGPU_HSA_HSAIL = 2, NT_AMDGPU_HSA_ISA = 3, NT_AMDGPU_HSA_PRODUCER = 4, NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5, NT_AMDGPU_HSA_EXTENSION = 6, + NT_AMDGPU_HSA_RESERVED_7 = 7, + NT_AMDGPU_HSA_RESERVED_8 = 8, + NT_AMDGPU_HSA_RESERVED_9 = 9, NT_AMDGPU_HSA_CODE_OBJECT_METADATA = 10, + NT_AMD_AMDGPU_ISA = 11, + NT_AMDGPU_PAL_METADATA = 12, NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101, NT_AMDGPU_HSA_HLDEBUG_TARGET = 102 }; + +enum NoteAmdGpuPalMetadataKey { + AMDGPU_PAL_METADATA_LS_NUM_USED_VGPRS = 0x10000015, + AMDGPU_PAL_METADATA_HS_NUM_USED_VGPRS = 0x10000016, + AMDGPU_PAL_METADATA_ES_NUM_USED_VGPRS = 0x10000017, + AMDGPU_PAL_METADATA_GS_NUM_USED_VGPRS = 0x10000018, + AMDGPU_PAL_METADATA_VS_NUM_USED_VGPRS = 0x10000019, + AMDGPU_PAL_METADATA_PS_NUM_USED_VGPRS = 0x1000001a, + AMDGPU_PAL_METADATA_CS_NUM_USED_VGPRS = 0x1000001b, + + AMDGPU_PAL_METADATA_LS_NUM_USED_SGPRS = 0x1000001c, + AMDGPU_PAL_METADATA_HS_NUM_USED_SGPRS = 0x1000001d, + AMDGPU_PAL_METADATA_ES_NUM_USED_SGPRS = 0x1000001e, + AMDGPU_PAL_METADATA_GS_NUM_USED_SGPRS = 0x1000001f, + AMDGPU_PAL_METADATA_VS_NUM_USED_SGPRS = 0x10000020, + AMDGPU_PAL_METADATA_PS_NUM_USED_SGPRS = 0x10000021, + AMDGPU_PAL_METADATA_CS_NUM_USED_SGPRS = 0x10000022, + + AMDGPU_PAL_METADATA_LS_SCRATCH_SIZE = 0x10000038, + AMDGPU_PAL_METADATA_HS_SCRATCH_SIZE = 0x10000039, + AMDGPU_PAL_METADATA_ES_SCRATCH_SIZE = 0x1000003a, + AMDGPU_PAL_METADATA_GS_SCRATCH_SIZE = 0x1000003b, + AMDGPU_PAL_METADATA_VS_SCRATCH_SIZE = 0x1000003c, + AMDGPU_PAL_METADATA_PS_SCRATCH_SIZE = 0x1000003d, + AMDGPU_PAL_METADATA_CS_SCRATCH_SIZE = 0x1000003e, +}; + } } Index: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -833,6 +833,7 @@ bool ParseDirectiveAMDKernelCodeT(); bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; bool ParseDirectiveAMDGPUHsaKernel(); + bool ParseDirectivePalMetadata(); bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum); @@ -2493,6 +2494,21 @@ return false; } +bool AMDGPUAsmParser::ParseDirectivePalMetadata() { + std::vector Data; + for (;;) { + uint32_t Value; + if (ParseAsAbsoluteExpression(Value)) + return TokError("invalid value in .amdgpu_pal_metadata"); + Data.push_back(Value); + if (getLexer().isNot(AsmToken::Comma)) + break; + Lex(); + } + getTargetStreamer().EmitPalMetadata(Data); + return false; +} + bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); @@ -2511,6 +2527,9 @@ if (IDVal == ".amdgpu_hsa_kernel") return ParseDirectiveAMDGPUHsaKernel(); + if (IDVal == ".amdgpu_pal_metadata") + return ParseDirectivePalMetadata(); + return true; } Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -53,6 +53,8 @@ /// \returns True on success, false on failure. virtual bool EmitCodeObjectMetadata(StringRef YamlString) = 0; + + virtual bool EmitPalMetadata(ArrayRef Data) = 0; }; class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer { @@ -72,6 +74,8 @@ /// \returns True on success, false on failure. bool EmitCodeObjectMetadata(StringRef YamlString) override; + + bool EmitPalMetadata(ArrayRef data) override; }; class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { @@ -99,6 +103,8 @@ /// \returns True on success, false on failure. bool EmitCodeObjectMetadata(StringRef YamlString) override; + + bool EmitPalMetadata(ArrayRef data) override; }; } Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -112,6 +112,14 @@ return true; } +bool AMDGPUTargetAsmStreamer::EmitPalMetadata(ArrayRef Data) { + OS << "\t.amdgpu_pal_metadata"; + for (auto I = Data.begin(), E = Data.end(); I != E; ++I) + OS << (I == Data.begin() ? " 0x" : ",0x") << Twine::utohexstr(*I); + OS << "\n"; + return true; +} + //===----------------------------------------------------------------------===// // AMDGPUTargetELFStreamer //===----------------------------------------------------------------------===// @@ -230,3 +238,16 @@ return true; } + +bool AMDGPUTargetELFStreamer::EmitPalMetadata(ArrayRef Data) { + EmitAMDGPUNote( + MCConstantExpr::create(Data.size() * sizeof(uint32_t), getContext()), + ElfNote::NT_AMDGPU_PAL_METADATA, + [&](MCELFStreamer &OS){ + for (auto I : Data) + OS.EmitIntValue(I, sizeof(uint32_t)); + } + ); + return true; +} + Index: llvm/trunk/test/CodeGen/AMDGPU/amdpal-cs.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/amdpal-cs.ll +++ llvm/trunk/test/CodeGen/AMDGPU/amdpal-cs.ll @@ -2,10 +2,9 @@ ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -enable-var-scope %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s -; amdpal compute shader: check for 47176 (COMPUTE_PGM_RSRC1) in .AMDGPU.config -; GCN-LABEL: .AMDGPU.config -; GCN: .long 47176 +; amdpal compute shader: check for 0x2e12 (COMPUTE_PGM_RSRC1) in pal metadata ; GCN-LABEL: {{^}}cs_amdpal: +; GCN: .amdgpu_pal_metadata{{.*}}0x2e12, define amdgpu_cs half @cs_amdpal(half %arg0) { %add = fadd half %arg0, 1.0 ret half %add Index: llvm/trunk/test/CodeGen/AMDGPU/amdpal-es.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/amdpal-es.ll +++ llvm/trunk/test/CodeGen/AMDGPU/amdpal-es.ll @@ -1,10 +1,9 @@ ; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; amdpal pixel shader: check for 45864 (SPI_SHADER_PGM_RSRC1_ES) in .AMDGPU.config -; GCN-LABEL: .AMDGPU.config -; GCN: .long 45864 +; amdpal evaluation shader: check for 0x2cca (SPI_SHADER_PGM_RSRC1_ES) in pal metadata ; GCN-LABEL: {{^}}es_amdpal: +; GCN: .amdgpu_pal_metadata{{.*}}0x2cca, define amdgpu_es half @es_amdpal(half %arg0) { %add = fadd half %arg0, 1.0 ret half %add Index: llvm/trunk/test/CodeGen/AMDGPU/amdpal-gs.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/amdpal-gs.ll +++ llvm/trunk/test/CodeGen/AMDGPU/amdpal-gs.ll @@ -2,10 +2,9 @@ ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s -; amdpal pixel shader: check for 45608 (SPI_SHADER_PGM_RSRC1_GS) in .AMDGPU.config -; GCN-LABEL: .AMDGPU.config -; GCN: .long 45608 +; amdpal geometry shader: check for 0x2c8a (SPI_SHADER_PGM_RSRC1_GS) in pal metadata ; GCN-LABEL: {{^}}gs_amdpal: +; GCN: .amdgpu_pal_metadata{{.*}}0x2c8a, define amdgpu_gs half @gs_amdpal(half %arg0) { %add = fadd half %arg0, 1.0 ret half %add Index: llvm/trunk/test/CodeGen/AMDGPU/amdpal-hs.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/amdpal-hs.ll +++ llvm/trunk/test/CodeGen/AMDGPU/amdpal-hs.ll @@ -2,10 +2,9 @@ ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s -; amdpal pixel shader: check for 46120 (SPI_SHADER_PGM_RSRC1_HS) in .AMDGPU.config -; GCN-LABEL: .AMDGPU.config -; GCN: .long 46120 +; amdpal hull shader: check for 0x2d0a (SPI_SHADER_PGM_RSRC1_HS) in pal metadata ; GCN-LABEL: {{^}}hs_amdpal: +; GCN: .amdgpu_pal_metadata{{.*}}0x2d0a, define amdgpu_hs half @hs_amdpal(half %arg0) { %add = fadd half %arg0, 1.0 ret half %add Index: llvm/trunk/test/CodeGen/AMDGPU/amdpal-ls.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/amdpal-ls.ll +++ llvm/trunk/test/CodeGen/AMDGPU/amdpal-ls.ll @@ -1,10 +1,9 @@ ; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; amdpal pixel shader: check for 46376 (SPI_SHADER_PGM_RSRC1_LS) in .AMDGPU.config -; GCN-LABEL: .AMDGPU.config -; GCN: .long 46376 +; amdpal load shader: check for 0x2d4a (SPI_SHADER_PGM_RSRC1_LS) in pal metadata ; GCN-LABEL: {{^}}ls_amdpal: +; GCN: .amdgpu_pal_metadata{{.*}}0x2d4a, define amdgpu_ls half @ls_amdpal(half %arg0) { %add = fadd half %arg0, 1.0 ret half %add Index: llvm/trunk/test/CodeGen/AMDGPU/amdpal-ps.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/amdpal-ps.ll +++ llvm/trunk/test/CodeGen/AMDGPU/amdpal-ps.ll @@ -2,13 +2,16 @@ ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s -; amdpal pixel shader: check for 45096 (SPI_SHADER_PGM_RSRC1_PS) in .AMDGPU.config -; GCN-LABEL: .AMDGPU.config -; GCN: .long 45096 +; amdpal pixel shader: check for 0x2c0a (SPI_SHADER_PGM_RSRC1_PS) in pal +; metadata. Check for 0x2c0b (SPI_SHADER_PGM_RSRC2_PS) in pal metadata, and +; it has a value starting 0x42 as it is set to 0x42000000 in the metadata +; below. Also check that key 0x10000000 value 0x12345678 is propagated. ; GCN-LABEL: {{^}}ps_amdpal: +; GCN: .amdgpu_pal_metadata{{.*0x2c0a,[^,]*,0x2c0b,0x42.*,0x10000000,0x12345678}} define amdgpu_ps half @ps_amdpal(half %arg0) { %add = fadd half %arg0, 1.0 ret half %add } - +!amdgpu.pal.metadata = !{!0} +!0 = !{i32 11275, i32 1107296256, i32 268435456, i32 305419896} Index: llvm/trunk/test/CodeGen/AMDGPU/amdpal-vs.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/amdpal-vs.ll +++ llvm/trunk/test/CodeGen/AMDGPU/amdpal-vs.ll @@ -2,10 +2,9 @@ ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s -; amdpal pixel shader: check for 45352 (SPI_SHADER_PGM_RSRC1_VS) in .AMDGPU.config -; GCN-LABEL: .AMDGPU.config -; GCN: .long 45352 +; amdpal vertex shader: check for 45352 (SPI_SHADER_PGM_RSRC1_VS) in pal metadata ; GCN-LABEL: {{^}}vs_amdpal: +; GCN: .amdgpu_pal_metadata{{.*}}0x2c4a, define amdgpu_vs half @vs_amdpal(half %arg0) { %add = fadd half %arg0, 1.0 ret half %add Index: llvm/trunk/test/MC/AMDGPU/pal.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/pal.s +++ llvm/trunk/test/MC/AMDGPU/pal.s @@ -0,0 +1,9 @@ +// RUN: llvm-mc -triple amdgcn--amdpal -mcpu=kaveri -show-encoding %s | FileCheck %s --check-prefix=ASM +// RUN: llvm-mc -filetype=obj -triple amdgcn--amdpal -mcpu=kaveri -show-encoding %s | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF + +.amdgpu_pal_metadata 0x12345678, 0xfedcba98, 0x2468ace0, 0xfdb97531 +// ASM: .amdgpu_pal_metadata 0x12345678,0xfedcba98,0x2468ace0,0xfdb97531 +// ELF: SHT_NOTE +// ELF: 0000: 04000000 10000000 0C000000 414D4400 +// ELF: 0010: 78563412 98BADCFE E0AC6824 3175B9FD +