Index: docs/AMDGPUUsage.rst =================================================================== --- docs/AMDGPUUsage.rst +++ docs/AMDGPUUsage.rst @@ -808,15 +808,17 @@ .. table:: AMDGPU ELF Symbols :name: amdgpu-elf-symbols-table - ===================== ============== ============= ================== - Name Type Section Description - ===================== ============== ============= ================== - *link-name* ``STT_OBJECT`` - ``.data`` Global variable - - ``.rodata`` - - ``.bss`` - *link-name*\ ``.kd`` ``STT_OBJECT`` - ``.rodata`` Kernel descriptor - *link-name* ``STT_FUNC`` - ``.text`` Kernel entry point - ===================== ============== ============= ================== + ===================== ================== ============= ================== + Name Type Section Description + ===================== ================== ============= ================== + *link-name* ``STT_OBJECT`` - ``.data`` Global variable + - ``.rodata`` + - ``.bss`` + *link-name*\ ``.kd`` ``STT_OBJECT`` - ``.rodata`` Kernel descriptor + *link-name* ``STT_FUNC`` - ``.text`` Kernel entry point + *link-name* ``STT_AMDGPU_LDS`` none Global variable in + LDS + ===================== ================== ============= ================== Global variable Global variables both used and defined by the compilation unit. @@ -828,10 +830,18 @@ will resolve relocations using the definition provided by another code object or explicitly defined by the runtime. - All global symbols, whether defined in the compilation unit or external, are - accessed by the machine code indirectly through a GOT table entry. This - allows them to be preemptable. The GOT table is only supported when the target - triple OS is ``amdhsa`` (see :ref:`amdgpu-target-triples`). + If the symbol resides in local/group memory (LDS) then its section is + ``STN_UNDEF``, its type is ``STT_AMDGPU_LDS``, and the top 5 bits of the + ``st_other`` field are the base-2 logarithm of the symbol's alignment + requirement. An alignment requirement equal to or larger than the maximum + allocation size of LDS memory indicates that the symbol value will always + be 0. At most one such symbol can be reachable from a kernel entry point. + + All global symbols, whether defined in the compilation unit or external, + except those residing in LDS are accessed by the machine code indirectly + through a GOT table entry. This allows them to be preemptable. The GOT table + is only supported when the target triple OS is ``amdhsa`` + (see :ref:`amdgpu-target-triples`). .. TODO Add description of linked shared object symbols. Seems undefined symbols Index: include/llvm/BinaryFormat/ELF.h =================================================================== --- include/llvm/BinaryFormat/ELF.h +++ include/llvm/BinaryFormat/ELF.h @@ -1055,7 +1055,8 @@ STT_HIPROC = 15, // Highest processor-specific symbol type // AMDGPU symbol types - STT_AMDGPU_HSA_KERNEL = 10 + STT_AMDGPU_HSA_KERNEL = 10, + STT_AMDGPU_LDS = 13, // Symbol is a data object in local memory (LDS) }; enum { Index: lib/MC/MCSymbolELF.cpp =================================================================== --- lib/MC/MCSymbolELF.cpp +++ lib/MC/MCSymbolELF.cpp @@ -23,18 +23,18 @@ // Shift value for STV_* flags. 4 possible values, 2 bits. ELF_STV_Shift = 5, - // Shift value for STO_* flags. 3 bits. All the values are between 0x20 and - // 0xe0, so we shift right by 5 before storing. + // Shift value for STO_* flags. 5 bits. All the values are between 0x08 and + // 0xf8, so we shift right by 3 before storing. ELF_STO_Shift = 7, // One bit. - ELF_IsSignature_Shift = 10, + ELF_IsSignature_Shift = 12, // One bit. - ELF_WeakrefUsedInReloc_Shift = 11, + ELF_WeakrefUsedInReloc_Shift = 13, // One bit. - ELF_BindingSet_Shift = 12 + ELF_BindingSet_Shift = 14 }; } @@ -119,6 +119,9 @@ case ELF::STT_GNU_IFUNC: Val = 6; break; + case ELF::STT_AMDGPU_LDS: + Val = 7; + break; } uint32_t OtherFlags = getFlags() & ~(0x7 << ELF_STT_Shift); setFlags(OtherFlags | (Val << ELF_STT_Shift)); @@ -143,6 +146,8 @@ return ELF::STT_TLS; case 6: return ELF::STT_GNU_IFUNC; + case 7: + return ELF::STT_AMDGPU_LDS; } } @@ -160,16 +165,16 @@ } void MCSymbolELF::setOther(unsigned Other) { - assert((Other & 0x1f) == 0); - Other >>= 5; - assert(Other <= 0x7); - uint32_t OtherFlags = getFlags() & ~(0x7 << ELF_STO_Shift); + assert((Other & 0x7) == 0); + Other >>= 3; + assert(Other <= 0x1f); + uint32_t OtherFlags = getFlags() & ~(0x1f << ELF_STO_Shift); setFlags(OtherFlags | (Other << ELF_STO_Shift)); } unsigned MCSymbolELF::getOther() const { - unsigned Other = (Flags >> ELF_STO_Shift) & 7; - return Other << 5; + unsigned Other = (Flags >> ELF_STO_Shift) & 0x1f; + return Other << 3; } void MCSymbolELF::setIsWeakrefUsedInReloc() const { @@ -198,4 +203,5 @@ bool MCSymbolELF::isBindingSet() const { return getFlags() & (0x1 << ELF_BindingSet_Shift); } + } Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -895,6 +895,7 @@ bool ParseDirectiveHSAMetadata(); bool ParseDirectivePALMetadataBegin(); bool ParseDirectivePALMetadata(); + bool ParseDirectiveAMDGPULDS(); /// Common code to parse out a block of text (typically YAML) between start and /// end directives. @@ -3544,6 +3545,33 @@ return false; } +/// ParseDirectiveAMDGPULDS +/// ::= .amdgpu_lds identifier [',' alignment] +bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { + StringRef Name; + if (getParser().parseIdentifier(Name)) + return TokError("expected identifier in directive"); + + MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); + + int64_t Align = 4; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + + SMLoc StartLoc = getLexer().getLoc(); + if (getParser().parseAbsoluteExpression(Align)) + return true; + + if (Align < 0 || !isPowerOf2_64(Align)) + return Error(StartLoc, "alignment must be a power of two"); + if (Align >= 1u << 31) + return Error(StartLoc, "alignment is too large"); + } + + getTargetStreamer().emitAMDGPULDS(Symbol, Align); + return false; +} + bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); @@ -3577,6 +3605,9 @@ return ParseDirectiveHSAMetadata(); } + if (IDVal == ".amdgpu_lds") + return ParseDirectiveAMDGPULDS(); + if (IDVal == PALMD::AssemblerDirectiveBegin) return ParseDirectivePALMetadataBegin(); Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -53,6 +53,8 @@ virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0; + virtual void emitAMDGPULDS(MCSymbol *Symbol, unsigned Align) = 0; + /// \returns True on success, false on failure. virtual bool EmitISAVersion(StringRef IsaVersionString) = 0; @@ -104,6 +106,8 @@ void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; + void emitAMDGPULDS(MCSymbol *Sym, unsigned Align) override; + /// \returns True on success, false on failure. bool EmitISAVersion(StringRef IsaVersionString) override; @@ -146,6 +150,8 @@ void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; + void emitAMDGPULDS(MCSymbol *Sym, unsigned Align) override; + /// \returns True on success, false on failure. bool EmitISAVersion(StringRef IsaVersionString) override; Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -202,6 +202,10 @@ } } +void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Align) { + OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Align << "\n"; +} + bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) { OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n"; return true; @@ -482,6 +486,14 @@ Symbol->setType(Type); } +void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Align) { + assert(isPowerOf2_32(Align)); + + MCSymbolELF *SymbolELF = cast(Symbol); + SymbolELF->setType(ELF::STT_AMDGPU_LDS); + SymbolELF->setOther(Log2_32(Align) << 3); +} + bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) { // Create two labels to mark the beginning and end of the desc field // and a MCExpr to calculate the size of the desc field. Index: test/MC/AMDGPU/elf-lds-error.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/elf-lds-error.s @@ -0,0 +1,10 @@ +// RUN: not llvm-mc -triple amdgcn-- -mcpu gfx900 %s -o - 2>&1 | FileCheck %s + +// CHECK: :[[@LINE+1]]:33: error: alignment must be a power of two + .amdgpu_lds zero_align, 0 + +// CHECK: :[[@LINE+1]]:36: error: alignment must be a power of two + .amdgpu_lds non_pot_align, 12 + +// CHECK: :[[@LINE+1]]:33: error: alignment is too large + .amdgpu_lds huge_align, 1099511627776 Index: test/MC/AMDGPU/elf-lds.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/elf-lds.s @@ -0,0 +1,34 @@ +// RUN: llvm-mc -filetype=obj -triple amdgcn-- -mcpu gfx900 %s -o - | llvm-readobj -t -r | FileCheck %s + + .text + .globl test_kernel + .p2align 8 + .type test_kernel,@function +test_kernel: + s_mov_b32 s0, lds0@abs32@lo + v_lshl_add_u32 v0, v0, 2, s0 + ds_read2_b32 v[1:2], v0 offset1:1 + s_endpgm +.Lfunc_end: + .size test_kernel, .Lfunc_end-test_kernel + + .globl lds0 + .size lds0, 192 + .amdgpu_lds lds0, 16 + +// CHECK: Relocations [ +// CHECK: Section (3) .rel.text { +// CHECK-NEXT: 0x4 R_AMDGPU_ABS32 lds0 0x0 +// CHECK-NEXT: } +// CHECK: ] + +// CHECK: Symbol { +// CHECK: Name: lds0 (39) +// CHECK-NEXT: Value: 0x0 +// CHECK-NEXT: Size: 192 +// CHECK-NEXT: Binding: Global (0x1) +// CHECK-NEXT: Type: AMDGPU_LDS (0xD) +// CHECK-NEXT: Align: 16 +// CHECK-NEXT: Other: 32 +// CHECK-NEXT: Section: Undefined (0x0) +// CHECK-NEXT: } Index: tools/llvm-readobj/ELFDumper.cpp =================================================================== --- tools/llvm-readobj/ELFDumper.cpp +++ tools/llvm-readobj/ELFDumper.cpp @@ -1020,7 +1020,8 @@ {"PROTECTED", "PROTECTED", ELF::STV_PROTECTED}}; static const EnumEntry AMDGPUSymbolTypes[] = { - { "AMDGPU_HSA_KERNEL", ELF::STT_AMDGPU_HSA_KERNEL } + { "AMDGPU_HSA_KERNEL", ELF::STT_AMDGPU_HSA_KERNEL }, + { "AMDGPU_LDS", ELF::STT_AMDGPU_LDS }, }; static const char *getGroupType(uint32_t Flag) { @@ -4396,15 +4397,22 @@ W.printNumber("Size", Symbol->st_size); W.printEnum("Binding", Symbol->getBinding(), makeArrayRef(ElfSymbolBindings)); if (Obj->getHeader()->e_machine == ELF::EM_AMDGPU && - SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS) + SymbolType >= ELF::STT_LOOS && SymbolType <= ELF::STT_HIPROC) W.printEnum("Type", SymbolType, makeArrayRef(AMDGPUSymbolTypes)); else W.printEnum("Type", SymbolType, makeArrayRef(ElfSymbolTypes)); - if (Symbol->st_other == 0) + + unsigned char st_other = Symbol->st_other; + if (Obj->getHeader()->e_machine == ELF::EM_AMDGPU && + SymbolType == ELF::STT_AMDGPU_LDS) { + W.printNumber("Align", 1u << (st_other >> 3u)); + st_other &= 0x7u; + } + if (st_other == 0) { // Usually st_other flag is zero. Do not pollute the output // by flags enumeration in that case. - W.printNumber("Other", 0); - else { + W.printNumber("Other", Symbol->st_other); + } else { std::vector> SymOtherFlags(std::begin(ElfSymOtherFlags), std::end(ElfSymOtherFlags)); if (Obj->getHeader()->e_machine == EM_MIPS) {