Index: lld/trunk/ELF/Driver.cpp =================================================================== --- lld/trunk/ELF/Driver.cpp +++ lld/trunk/ELF/Driver.cpp @@ -253,6 +253,9 @@ if (Config->GnuHash && Config->EMachine == EM_MIPS) error("The .gnu.hash section is not compatible with the MIPS target."); + + if (!Config->Entry.empty() && Config->EMachine == EM_AMDGPU) + error("-e option is not valid for AMDGPU."); } template void LinkerDriver::link(opt::InputArgList &Args) { @@ -261,7 +264,10 @@ if (!Config->Shared) { // Add entry symbol. - if (Config->Entry.empty()) + // + // There is no entry symbol for AMDGPU binaries, so skip adding one to avoid + // having and undefined symbol. + if (Config->Entry.empty() && Config->EMachine != EM_AMDGPU) Config->Entry = (Config->EMachine == EM_MIPS) ? "__start" : "_start"; // In the assembly for 32 bit x86 the _GLOBAL_OFFSET_TABLE_ symbol Index: lld/trunk/ELF/OutputSections.cpp =================================================================== --- lld/trunk/ELF/OutputSections.cpp +++ lld/trunk/ELF/OutputSections.cpp @@ -788,6 +788,11 @@ InputSectionBase *SC = DR.Section; if (!SC) return DR.Sym.st_value; + + // Symbol offsets for AMDGPU need to be the offset in bytes of the symbol + // from the beginning of the section. + if (Config->EMachine == EM_AMDGPU) + return SC->getOffset(DR.Sym); if (DR.Sym.getType() == STT_TLS) return SC->OutSec->getVA() + SC->getOffset(DR.Sym) - Out::TlsPhdr->p_vaddr; @@ -1325,7 +1330,11 @@ continue; const OutputSectionBase *OutSec = Section->OutSec; ESym->st_shndx = OutSec->SectionIndex; - VA += OutSec->getVA() + Section->getOffset(Sym); + VA = Section->getOffset(Sym); + // Symbol offsets for AMDGPU need to be the offset in bytes of the + // symbol from the beginning of the section. + if (Config->EMachine != EM_AMDGPU) + VA += OutSec->getVA(); } ESym->st_name = StrTabSec.addString(SymName); ESym->st_size = Sym.st_size; Index: lld/trunk/ELF/Target.cpp =================================================================== --- lld/trunk/ELF/Target.cpp +++ lld/trunk/ELF/Target.cpp @@ -191,6 +191,22 @@ uint8_t *PairedLoc = nullptr) const override; }; +class AMDGPUTargetInfo final : public TargetInfo { +public: + AMDGPUTargetInfo(); + void writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const override; + void writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr) const override; + void writePltEntry(uint8_t *Buf, uint64_t GotAddr, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const override; + bool relocNeedsGot(uint32_t Type, const SymbolBody &S) const override; + bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const override; + void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, + uint64_t SA, uint64_t ZA = 0, + uint8_t *PairedLoc = nullptr) const override; +}; + template class MipsTargetInfo final : public TargetInfo { public: MipsTargetInfo(); @@ -216,6 +232,8 @@ return new X86TargetInfo(); case EM_AARCH64: return new AArch64TargetInfo(); + case EM_AMDGPU: + return new AMDGPUTargetInfo(); case EM_MIPS: switch (Config->EKind) { case ELF32LEKind: @@ -1315,6 +1333,38 @@ } } +AMDGPUTargetInfo::AMDGPUTargetInfo() {} + +void AMDGPUTargetInfo::writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const { + llvm_unreachable("not implemented"); +} + +void AMDGPUTargetInfo::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr) const { + llvm_unreachable("not implemented"); +} + +void AMDGPUTargetInfo::writePltEntry(uint8_t *Buf, uint64_t GotAddr, + uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + llvm_unreachable("not implemented"); +} + +bool AMDGPUTargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const { + return false; +} + +bool AMDGPUTargetInfo::relocNeedsPlt(uint32_t Type, const SymbolBody &S) const { + return false; +} + +void AMDGPUTargetInfo::relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, + uint64_t P, uint64_t SA, uint64_t ZA, + uint8_t *PairedLoc) const { + llvm_unreachable("not implemented"); +} + template MipsTargetInfo::MipsTargetInfo() { PageSize = 65536; GotHeaderEntriesNum = 2; Index: lld/trunk/ELF/Writer.cpp =================================================================== --- lld/trunk/ELF/Writer.cpp +++ lld/trunk/ELF/Writer.cpp @@ -975,6 +975,18 @@ return Ret; } +/// For AMDGPU we need to use custom segment kinds in order to specify which +/// address space data should be loaded into. +template +static uint32_t getAmdgpuPhdr(OutputSectionBase *Sec) { + uint32_t Flags = Sec->getFlags(); + if (Flags & SHF_AMDGPU_HSA_CODE) + return PT_AMDGPU_HSA_LOAD_CODE_AGENT; + if ((Flags & SHF_AMDGPU_HSA_GLOBAL) && !(Flags & SHF_AMDGPU_HSA_AGENT)) + return PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM; + return PT_LOAD; +} + template void Writer::updateRelro(Elf_Phdr *Cur, Elf_Phdr *GnuRelroPhdr, uintX_t VA) { @@ -1032,7 +1044,9 @@ if (PH->p_flags != Flags) { // Flags changed. Create a new PT_LOAD. PH = &Phdrs[++PhdrIdx]; - setPhdr(PH, PT_LOAD, Flags, FileOff, VA, 0, Target->getPageSize()); + uint32_t PTType = (Config->EMachine != EM_AMDGPU) ? (uint32_t)PT_LOAD + : getAmdgpuPhdr(Sec); + setPhdr(PH, PTType, Flags, FileOff, VA, 0, Target->getPageSize()); } if (Sec->getFlags() & SHF_TLS) { Index: lld/trunk/test/ELF/amdgpu-entry.s =================================================================== --- lld/trunk/test/ELF/amdgpu-entry.s +++ lld/trunk/test/ELF/amdgpu-entry.s @@ -0,0 +1,14 @@ +# RUN: llvm-mc -filetype=obj -triple=amdgcpu--amdgcn -mcpu=kaveri %s -o %t.o +# RUN: not lld -e kernel0 -flavor gnu %t.o -o %t + +.hsa_code_object_version 1,0 +.hsa_code_object_isa 7,0,0,"AMD","AMDGPU" + +.hsatext +.globl kernel0 +.align 256 +.amdgpu_hsa_kernel kernel0 +kernel0: + s_endpgm +.Lfunc_end0: + .size kernel0, .Lfunc_end0-kernel0 Index: lld/trunk/test/ELF/amdgpu-globals.s =================================================================== --- lld/trunk/test/ELF/amdgpu-globals.s +++ lld/trunk/test/ELF/amdgpu-globals.s @@ -0,0 +1,138 @@ +# RUN: llvm-mc -filetype=obj -triple=amdgcpu--amdgcn -mcpu=kaveri %s -o %t.o +# RUN: lld -flavor gnu %t.o -o %t +# RUN: llvm-readobj -sections -symbols -program-headers %t | FileCheck %s + +# Requires: amdgpu + + .amdgpu_hsa_module_global module_global_program + .size module_global_program, 4 + .hsadata_global_program +module_global_program: + .long 0 ; 0x0 + + .amdgpu_hsa_program_global program_global_program + .size program_global_program, 4 + .hsadata_global_program +program_global_program: + .long 0 ; 0x0 + + .amdgpu_hsa_module_global module_global_agent + .size module_global_agent, 4 + .hsadata_global_agent +module_global_agent: + .long 0 ; 0x0 + + .amdgpu_hsa_program_global program_global_agent + .size program_global_agent, 4 + .hsadata_global_agent +program_global_agent: + .long 0 ; 0x0 + + .amdgpu_hsa_module_global module_global_readonly + .size module_global_readonly, 4 + .hsatext +module_global_readonly: + .long 0 ; 0x0 + + .amdgpu_hsa_program_global program_global_readonly + .size program_global_readonly, 4 + .hsatext +program_global_readonly: + .long 0 ; 0x0 + +# CHECK: Section { +# CHECK: Name: .hsadata_global_program +# CHECK: Type: SHT_PROGBITS (0x1) +# CHECK: Flags [ (0x100003) +# CHECK: SHF_ALLOC (0x2) +# CHECK: SHF_AMDGPU_HSA_GLOBAL (0x100000) +# CHECK: SHF_WRITE (0x1) +# CHECK: ] +# CHECK: Address: [[HSADATA_GLOBAL_PROGRAM_ADDR:[0-9xa-f]+]] +# CHECK: } + +# CHECK: Section { +# CHECK: Name: .hsadata_global_agent +# CHECK: Type: SHT_PROGBITS (0x1) +# CHECK: Flags [ (0x900003) +# CHECK: SHF_ALLOC (0x2) +# CHECK: SHF_AMDGPU_HSA_AGENT (0x800000) +# CHECK: SHF_AMDGPU_HSA_GLOBAL (0x100000) +# CHECK: SHF_WRITE (0x1) +# CHECK: ] +# CHECK: } + +# CHECK: Section { +# CHECK: Name: .hsatext +# CHECK: Type: SHT_PROGBITS +# CHECK: Flags [ (0xC00007) +# CHECK: SHF_ALLOC (0x2) +# CHECK: SHF_AMDGPU_HSA_AGENT (0x800000) +# CHECK: SHF_AMDGPU_HSA_CODE (0x400000) +# CHECK: SHF_EXECINSTR (0x4) +# CHECK: SHF_WRITE (0x1) +# CHECK: ] +# CHECK: Address: [[HSATEXT_ADDR:[0-9xa-f]+]] +# CHECK: } + +# CHECK: Symbol { +# CHECK: Name: module_global_agent +# CHECK: Value: 0x0 +# CHECK: Size: 4 +# CHECK: Binding: Local +# CHECK: Section: .hsadata_global_agent +# CHECK: } + +# CHECK: Symbol { +# CHECK: Name: module_global_program +# CHECK: Value: 0x0 +# CHECK: Size: 4 +# CHECK: Binding: Local +# CHECK: Section: .hsadata_global_program +# CHECK: } + +# CHECK: Symbol { +# CHECK: Name: module_global_readonly +# CHECK: Value: 0x0 +# CHECK: Size: 4 +# CHECK: Binding: Local +# CHECK: Type: Object +# CHECK: Section: .hsatext +# CHECK: } + +# CHECK: Symbol { +# CHECK: Name: program_global_agent +# CHECK: Value: 0x4 +# CHECK: Size: 4 +# CHECK: Binding: Global +# CHECK: Type: Object +# CHECK: Section: .hsadata_global_agent +# CHECK: } + +# CHECK: Symbol { +# CHECK: Name: program_global_program +# CHECK: Value: 0x4 +# CHECK: Size: 4 +# CHECK: Binding: Global +# CHECK: Type: Object +# CHECK: Section: .hsadata_global_program +# CHECK: } + +# CHECK: Symbol { +# CHECK: Name: program_global_readonly +# CHECK: Value: 0x4 +# CHECK: Size: 4 +# CHECK: Binding: Global +# CHECK: Type: Object +# CHECK: Section: .hsatext +# CHECK: } + +# CHECK: ProgramHeader { +# CHECK: Type: PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM +# CHECK: VirtualAddress: [[HSADATA_GLOBAL_PROGRAM_ADDR]] +# CHECK: } + +# CHECK: ProgramHeader { +# CHECK: Type: PT_AMDGPU_HSA_LOAD_CODE_AGENT +# CHECK: VirtualAddress: [[HSATEXT_ADDR]] +# CHECK: } Index: lld/trunk/test/ELF/amdgpu-kernels.s =================================================================== --- lld/trunk/test/ELF/amdgpu-kernels.s +++ lld/trunk/test/ELF/amdgpu-kernels.s @@ -0,0 +1,61 @@ +# RUN: llvm-mc -filetype=obj -triple=amdgcpu--amdgcn -mcpu=kaveri %s -o %t.o +# RUN: lld -flavor gnu %t.o -o %t +# RUN: llvm-readobj -sections -symbols -program-headers %t | FileCheck %s + +.hsa_code_object_version 1,0 +.hsa_code_object_isa 7,0,0,"AMD","AMDGPU" + +.hsatext +.globl kernel0 +.align 256 +.amdgpu_hsa_kernel kernel0 +kernel0: + s_endpgm +.Lfunc_end0: + .size kernel0, .Lfunc_end0-kernel0 + +.globl kernel1 +.align 256 +.amdgpu_hsa_kernel kernel1 +kernel1: + s_endpgm + s_endpgm +.Lfunc_end1: + .size kernel1, .Lfunc_end1-kernel1 + + +# CHECK: Section { +# CHECK: Name: .hsatext +# CHECK: Type: SHT_PROGBITS +# CHECK: Flags [ (0xC00007) +# CHECK: SHF_ALLOC (0x2) +# CHECK: SHF_AMDGPU_HSA_AGENT (0x800000) +# CHECK: SHF_AMDGPU_HSA_CODE (0x400000) +# CHECK: SHF_EXECINSTR (0x4) +# CHECK: SHF_WRITE (0x1) +# CHECK: ] +# CHECK: Address: [[HSATEXT_ADDR:[0-9xa-f]+]] +# CHECK: } + +# CHECK: Symbol { +# CHECK: Name: kernel0 +# CHECK: Value: 0x0 +# CHECK: Size: 4 +# CHECK: Binding: Global +# CHECK: Type: AMDGPU_HSA_KERNEL +# CHECK: Section: .hsatext +# CHECK: } + +# CHECK: Symbol { +# CHECK: Name: kernel1 +# CHECK: Value: 0x100 +# CHECK: Size: 8 +# CHECK: Binding: Global +# CHECK: Type: AMDGPU_HSA_KERNEL +# CHECK: Section: .hsatext +# CHECK: } + +# CHECK: ProgramHeader { +# CHECK: Type: PT_AMDGPU_HSA_LOAD_CODE_AGENT +# CHECK: VirtualAddress: [[HSATEXT_ADDR]] +# CHECK: }