Index: ELF/Driver.cpp =================================================================== --- ELF/Driver.cpp +++ ELF/Driver.cpp @@ -245,7 +245,10 @@ if (!Config->Shared) { // Add entry symbol. - if (Config->Entry.empty()) + // + // There is no entry symbol for AMDGPU binaries, so skip adding one to avoid + // having and undefined symbol. + if (Config->Entry.empty() && Config->EMachine != EM_AMDGPU) Config->Entry = (Config->EMachine == EM_MIPS) ? "__start" : "_start"; // In the assembly for 32 bit x86 the _GLOBAL_OFFSET_TABLE_ symbol Index: ELF/OutputSections.cpp =================================================================== --- ELF/OutputSections.cpp +++ ELF/OutputSections.cpp @@ -786,6 +786,10 @@ case SymbolBody::DefinedRegularKind: { const auto &DR = cast>(S); InputSectionBase &SC = DR.Section; + // Symbol offsets for AMDGPU need to be the offset in bytes of the symbol + // from the beginning of the section. + if (Config->EMachine == EM_AMDGPU) + return SC.getOffset(DR.Sym); if (DR.Sym.getType() == STT_TLS) return SC.OutSec->getVA() + SC.getOffset(DR.Sym) - Out::TlsPhdr->p_vaddr; @@ -1289,7 +1293,11 @@ continue; const OutputSectionBase *OutSec = Section->OutSec; ESym->st_shndx = OutSec->SectionIndex; - VA += OutSec->getVA() + Section->getOffset(Sym); + VA = Section->getOffset(Sym); + // Symbol offsets for AMDGPU need to be the offset in bytes of the + // symbol from the beginning of the section. + if (Config->EMachine != EM_AMDGPU) + VA += OutSec->getVA(); } ESym->st_name = StrTabSec.getOffset(SymName); ESym->st_size = Sym.st_size; Index: ELF/Target.cpp =================================================================== --- ELF/Target.cpp +++ ELF/Target.cpp @@ -177,6 +177,22 @@ uint8_t *PairedLoc = nullptr) const override; }; +class AMDGPUTargetInfo final : public TargetInfo { +public: + AMDGPUTargetInfo(); + void writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const override; + void writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr) const override; + void writePltEntry(uint8_t *Buf, uint64_t GotAddr, uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const override; + bool relocNeedsGot(uint32_t Type, const SymbolBody &S) const override; + bool relocNeedsPlt(uint32_t Type, const SymbolBody &S) const override; + void relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, uint64_t P, + uint64_t SA, uint64_t ZA = 0, + uint8_t *PairedLoc = nullptr) const override; +}; + template class MipsTargetInfo final : public TargetInfo { public: MipsTargetInfo(); @@ -201,6 +217,8 @@ return new X86TargetInfo(); case EM_AARCH64: return new AArch64TargetInfo(); + case EM_AMDGPU: + return new AMDGPUTargetInfo(); case EM_MIPS: switch (Config->EKind) { case ELF32LEKind: @@ -1251,6 +1269,38 @@ } } +AMDGPUTargetInfo::AMDGPUTargetInfo() {} + +void AMDGPUTargetInfo::writeGotPltEntry(uint8_t *Buf, uint64_t Plt) const { + llvm_unreachable("not implemented"); +} + +void AMDGPUTargetInfo::writePltZeroEntry(uint8_t *Buf, uint64_t GotEntryAddr, + uint64_t PltEntryAddr) const { + llvm_unreachable("not implemented"); +} + +void AMDGPUTargetInfo::writePltEntry(uint8_t *Buf, uint64_t GotAddr, + uint64_t GotEntryAddr, + uint64_t PltEntryAddr, int32_t Index, + unsigned RelOff) const { + llvm_unreachable("not implemented"); +} + +bool AMDGPUTargetInfo::relocNeedsGot(uint32_t Type, const SymbolBody &S) const { + return false; +} + +bool AMDGPUTargetInfo::relocNeedsPlt(uint32_t Type, const SymbolBody &S) const { + return false; +} + +void AMDGPUTargetInfo::relocateOne(uint8_t *Loc, uint8_t *BufEnd, uint32_t Type, + uint64_t P, uint64_t SA, uint64_t ZA, + uint8_t *PairedLoc) const { + llvm_unreachable("not implemented"); +} + template MipsTargetInfo::MipsTargetInfo() { PageSize = 65536; GotHeaderEntriesNum = 2; Index: ELF/Writer.cpp =================================================================== --- ELF/Writer.cpp +++ ELF/Writer.cpp @@ -837,6 +837,22 @@ return Ret; } +template static uint32_t getPhdrTy(OutputSectionBase *Sec) { + if (Config->EMachine != EM_AMDGPU) + return PT_LOAD; + + // For AMDGPU we need to use custom segment kinds in order to specify which + // address space data should be loaded into. + uint32_t Flags = Sec->getFlags(); + if (Flags & SHF_AMDGPU_HSA_CODE) + return PT_AMDGPU_HSA_LOAD_CODE_AGENT; + + if ((Flags & SHF_AMDGPU_HSA_GLOBAL) && !(Flags & SHF_AMDGPU_HSA_AGENT)) + return PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM; + + return PT_LOAD; +} + template void Writer::updateRelro(Elf_Phdr *Cur, Elf_Phdr *GnuRelroPhdr, OutputSectionBase *Sec, uintX_t VA) { @@ -887,7 +903,8 @@ VA = RoundUpToAlignment(VA, Target->getPageSize()); FileOff = RoundUpToAlignment(FileOff, Target->getPageSize()); Elf_Phdr *PH = &Phdrs[++PhdrIdx]; - setPhdr(PH, PT_LOAD, Flags, FileOff, VA, 0, Target->getPageSize()); + uint32_t PTType = getPhdrTy(Sec); + setPhdr(PH, PTType, Flags, FileOff, VA, 0, Target->getPageSize()); } if (Sec->getFlags() & SHF_TLS) { Index: test/ELF/amdgpu-globals.s =================================================================== --- /dev/null +++ test/ELF/amdgpu-globals.s @@ -0,0 +1,138 @@ +# RUN: llvm-mc -filetype=obj -triple=amdgcpu--amdgcn -mcpu=kaveri %s -o %t.o +# RUN: lld -flavor gnu %t.o -o %t +# RUN: llvm-readobj -sections -symbols -program-headers %t | FileCheck %s + +# Requires: amdgpu + + .amdgpu_hsa_module_global module_global_program + .size module_global_program, 4 + .hsadata_global_program +module_global_program: + .long 0 ; 0x0 + + .amdgpu_hsa_program_global program_global_program + .size program_global_program, 4 + .hsadata_global_program +program_global_program: + .long 0 ; 0x0 + + .amdgpu_hsa_module_global module_global_agent + .size module_global_agent, 4 + .hsadata_global_agent +module_global_agent: + .long 0 ; 0x0 + + .amdgpu_hsa_program_global program_global_agent + .size program_global_agent, 4 + .hsadata_global_agent +program_global_agent: + .long 0 ; 0x0 + + .amdgpu_hsa_module_global module_global_readonly + .size module_global_readonly, 4 + .hsatext +module_global_readonly: + .long 0 ; 0x0 + + .amdgpu_hsa_program_global program_global_readonly + .size program_global_readonly, 4 + .hsatext +program_global_readonly: + .long 0 ; 0x0 + +# CHECK: Section { +# CHECK: Name: .hsadata_global_program +# CHECK: Type: SHT_PROGBITS (0x1) +# CHECK: Flags [ (0x100003) +# CHECK: SHF_ALLOC (0x2) +# CHECK: SHF_AMDGPU_HSA_GLOBAL (0x100000) +# CHECK: SHF_WRITE (0x1) +# CHECK: ] +# CHECK: Address: [[HSADATA_GLOBAL_PROGRAM_ADDR:[0-9xa-f]+]] +# CHECK: } + +# CHECK: Section { +# CHECK: Name: .hsadata_global_agent +# CHECK: Type: SHT_PROGBITS (0x1) +# CHECK: Flags [ (0x900003) +# CHECK: SHF_ALLOC (0x2) +# CHECK: SHF_AMDGPU_HSA_AGENT (0x800000) +# CHECK: SHF_AMDGPU_HSA_GLOBAL (0x100000) +# CHECK: SHF_WRITE (0x1) +# CHECK: ] +# CHECK: } + +# CHECK: Section { +# CHECK: Name: .hsatext +# CHECK: Type: SHT_PROGBITS +# CHECK: Flags [ (0xC00007) +# CHECK: SHF_ALLOC (0x2) +# CHECK: SHF_AMDGPU_HSA_AGENT (0x800000) +# CHECK: SHF_AMDGPU_HSA_CODE (0x400000) +# CHECK: SHF_EXECINSTR (0x4) +# CHECK: SHF_WRITE (0x1) +# CHECK: ] +# CHECK: Address: [[HSATEXT_ADDR:[0-9xa-f]+]] +# CHECK: } + +# CHECK: Symbol { +# CHECK: Name: module_global_agent +# CHECK: Value: 0x0 +# CHECK: Size: 4 +# CHECK: Binding: Local +# CHECK: Section: .hsadata_global_agent +# CHECK: } + +# CHECK: Symbol { +# CHECK: Name: module_global_program +# CHECK: Value: 0x0 +# CHECK: Size: 4 +# CHECK: Binding: Local +# CHECK: Section: .hsadata_global_program +# CHECK: } + +# CHECK: Symbol { +# CHECK: Name: module_global_readonly +# CHECK: Value: 0x0 +# CHECK: Size: 4 +# CHECK: Binding: Local +# CHECK: Type: Object +# CHECK: Section: .hsatext +# CHECK: } + +# CHECK: Symbol { +# CHECK: Name: program_global_agent +# CHECK: Value: 0x4 +# CHECK: Size: 4 +# CHECK: Binding: Global +# CHECK: Type: Object +# CHECK: Section: .hsadata_global_agent +# CHECK: } + +# CHECK: Symbol { +# CHECK: Name: program_global_program +# CHECK: Value: 0x4 +# CHECK: Size: 4 +# CHECK: Binding: Global +# CHECK: Type: Object +# CHECK: Section: .hsadata_global_program +# CHECK: } + +# CHECK: Symbol { +# CHECK: Name: program_global_readonly +# CHECK: Value: 0x4 +# CHECK: Size: 4 +# CHECK: Binding: Global +# CHECK: Type: Object +# CHECK: Section: .hsatext +# CHECK: } + +# CHECK: ProgramHeader { +# CHECK: Type: PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM +# CHECK: VirtualAddress: [[HSADATA_GLOBAL_PROGRAM_ADDR]] +# CHECK: } + +# CHECK: ProgramHeader { +# CHECK: Type: PT_AMDGPU_HSA_LOAD_CODE_AGENT +# CHECK: VirtualAddress: [[HSATEXT_ADDR]] +# CHECK: } Index: test/ELF/amdgpu-kernels.s =================================================================== --- /dev/null +++ test/ELF/amdgpu-kernels.s @@ -0,0 +1,61 @@ +# RUN: llvm-mc -filetype=obj -triple=amdgcpu--amdgcn -mcpu=kaveri %s -o %t.o +# RUN: lld -flavor gnu %t.o -o %t +# RUN: llvm-readobj -sections -symbols -program-headers %t | FileCheck %s + +.hsa_code_object_version 1,0 +.hsa_code_object_isa 7,0,0,"AMD","AMDGPU" + +.hsatext +.globl kernel0 +.align 256 +.amdgpu_hsa_kernel kernel0 +kernel0: + s_endpgm +.Lfunc_end0: + .size kernel0, .Lfunc_end0-kernel0 + +.globl kernel1 +.align 256 +.amdgpu_hsa_kernel kernel1 +kernel1: + s_endpgm + s_endpgm +.Lfunc_end1: + .size kernel1, .Lfunc_end1-kernel1 + + +# CHECK: Section { +# CHECK: Name: .hsatext +# CHECK: Type: SHT_PROGBITS +# CHECK: Flags [ (0xC00007) +# CHECK: SHF_ALLOC (0x2) +# CHECK: SHF_AMDGPU_HSA_AGENT (0x800000) +# CHECK: SHF_AMDGPU_HSA_CODE (0x400000) +# CHECK: SHF_EXECINSTR (0x4) +# CHECK: SHF_WRITE (0x1) +# CHECK: ] +# CHECK: Address: [[HSATEXT_ADDR:[0-9xa-f]+]] +# CHECK: } + +# CHECK: Symbol { +# CHECK: Name: kernel0 +# CHECK: Value: 0x0 +# CHECK: Size: 4 +# CHECK: Binding: Global +# CHECK: Type: AMDGPU_HSA_KERNEL +# CHECK: Section: .hsatext +# CHECK: } + +# CHECK: Symbol { +# CHECK: Name: kernel1 +# CHECK: Value: 0x100 +# CHECK: Size: 8 +# CHECK: Binding: Global +# CHECK: Type: AMDGPU_HSA_KERNEL +# CHECK: Section: .hsatext +# CHECK: } + +# CHECK: ProgramHeader { +# CHECK: Type: PT_AMDGPU_HSA_LOAD_CODE_AGENT +# CHECK: VirtualAddress: [[HSATEXT_ADDR]] +# CHECK: }