Index: docs/CodeGenerator.rst =================================================================== --- docs/CodeGenerator.rst +++ docs/CodeGenerator.rst @@ -2643,3 +2643,50 @@ a limited number of kernel function calls. Prior to running an eBPF program, a verifier performs static analysis to prevent loops in the code and to ensure valid register usage and operand types. + +The AMDGPU backend +------------------ + +The AMDGPU code generator lives in the lib/Target/AMDGPU directory, and is an +open source native AMD GCN ISA code generator. + +Target triples supported +^^^^^^^^^^^^^^^^^^^^^^^^ + +The following are the known target triples that are supported by the AMDGPU +backend. + +* **amdgcn--** --- AMD GCN GPUs (AMDGPU.7.0.0+) +* **amdgcn--amdhsa** --- AMD GCN GPUs (AMDGPU.7.0.0+) with HSA support +* **r600--** --- AMD GPUs HD2XXX-HD6XXX + +Relocations +^^^^^^^^^^^ + +Supported relocatable fields are: + +* **word32** --- This specifies a 32-bit field occupying 4 bytes with arbitrary + byte alignment. These values use the same byte order as other word values in + the AMD GPU architecture +* **word64** --- This specifies a 64-bit field occupying 8 bytes with arbitrary + byte alignment. These values use the same byte order as other word values in + the AMD GPU architecture + +Following notations are used for specifying relocation types + +* **A** --- Represents the addend used to compute the value of the relocatable + field +* **S** --- Represents the value of the symbol whose index resides in the + relocation entry + +AMDGPU Backend generates *Elf64_Rela* relocation records with the following +supported relocation types: + + ==================== ===== ========== ============================ + Relocation type Value Field Calculation + ==================== ===== ========== ============================ + ``R_AMDGPU_NONE`` 0 ``none`` ``none`` + ``R_AMDGPU_32_LOW`` 1 ``word32`` (S + A) & 0xFFFFFFFF + ``R_AMDGPU_32_HIGH`` 2 ``word32`` ((S + A) >> 32) & 0xFFFFFFFF + ``R_AMDGPU_64`` 3 ``word64`` (S + A) & 0xFFFFFFFFFFFFFFFF + ==================== ===== ========== ============================ Index: include/llvm/MC/MCExpr.h =================================================================== --- include/llvm/MC/MCExpr.h +++ include/llvm/MC/MCExpr.h @@ -266,6 +266,10 @@ VK_WebAssembly_FUNCTION, // Function table index, rather than virtual addr + VK_AMDGPU_32_LO, + VK_AMDGPU_32_HI, + VK_AMDGPU_64, + VK_TPREL, VK_DTPREL }; Index: include/llvm/Object/RelocVisitor.h =================================================================== --- include/llvm/Object/RelocVisitor.h +++ include/llvm/Object/RelocVisitor.h @@ -139,6 +139,14 @@ HasError = true; return RelocToApply(); } + case Triple::amdgcn: + switch (RelocType) { + case llvm::ELF::R_AMDGPU_32_LOW: + return visitELF_AMDGPU_32_LOW(R, Value); + default: + HasError = true; + return RelocToApply(); + } default: HasError = true; return RelocToApply(); @@ -403,6 +411,12 @@ return RelocToApply(static_cast(Res), 4); } + // AMDGPU ELF + RelocToApply visitELF_AMDGPU_32_LOW(RelocationRef R, uint64_t Value) { + int64_t Addend = getELFAddend(R); + return RelocToApply(Value + Addend, 4); + } + /// I386 COFF RelocToApply visitCOFF_I386_SECREL(RelocationRef R, uint64_t Value) { return RelocToApply(static_cast(Value), /*Width=*/4); Index: include/llvm/Support/ELF.h =================================================================== --- include/llvm/Support/ELF.h +++ include/llvm/Support/ELF.h @@ -614,6 +614,11 @@ #include "ELFRelocs/WebAssembly.def" }; +// ELF Relocation types for AMDGPU +enum { +#include "ELFRelocs/AMDGPU.def" +}; + #undef ELF_RELOC // Section header. Index: include/llvm/Support/ELFRelocs/AMDGPU.def =================================================================== --- include/llvm/Support/ELFRelocs/AMDGPU.def +++ include/llvm/Support/ELFRelocs/AMDGPU.def @@ -0,0 +1,8 @@ +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +ELF_RELOC(R_AMDGPU_NONE, 0) +ELF_RELOC(R_AMDGPU_32_LOW, 1) +ELF_RELOC(R_AMDGPU_32_HIGH, 2) +ELF_RELOC(R_AMDGPU_64, 3) Index: lib/MC/MCExpr.cpp =================================================================== --- lib/MC/MCExpr.cpp +++ lib/MC/MCExpr.cpp @@ -277,6 +277,9 @@ case VK_Hexagon_IE: return "IE"; case VK_Hexagon_IE_GOT: return "IEGOT"; case VK_WebAssembly_FUNCTION: return "FUNCTION"; + case VK_AMDGPU_32_LO: return "32_LO"; + case VK_AMDGPU_32_HI: return "32_HI"; + case VK_AMDGPU_64: return "64"; } llvm_unreachable("Invalid variant kind"); } @@ -374,6 +377,9 @@ .Case("prel31", VK_ARM_PREL31) .Case("sbrel", VK_ARM_SBREL) .Case("tlsldo", VK_ARM_TLSLDO) + .Case("32_lo", VK_AMDGPU_32_LO) + .Case("32_hi", VK_AMDGPU_32_HI) + .Case("64", VK_AMDGPU_64) .Default(VK_Invalid); } Index: lib/Object/ELF.cpp =================================================================== --- lib/Object/ELF.cpp +++ lib/Object/ELF.cpp @@ -105,6 +105,12 @@ break; } break; + case ELF::EM_AMDGPU: + switch (Type) { +#include "llvm/Support/ELFRelocs/AMDGPU.def" + default: + break; + } default: break; } Index: lib/ObjectYAML/ELFYAML.cpp =================================================================== --- lib/ObjectYAML/ELFYAML.cpp +++ lib/ObjectYAML/ELFYAML.cpp @@ -531,6 +531,9 @@ case ELF::EM_LANAI: #include "llvm/Support/ELFRelocs/Lanai.def" break; + case ELF::EM_AMDGPU: +#include "llvm/Support/ELFRelocs/AMDGPU.def" + break; default: llvm_unreachable("Unsupported architecture"); } Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -11,6 +11,7 @@ #include "AMDGPUMCTargetDesc.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCValue.h" using namespace llvm; @@ -21,10 +22,7 @@ AMDGPUELFObjectWriter(bool Is64Bit); protected: unsigned getRelocType(MCContext &Ctx, const MCValue &Target, - const MCFixup &Fixup, bool IsPCRel) const override { - return Fixup.getKind(); - } - + const MCFixup &Fixup, bool IsPCRel) const override; }; @@ -32,7 +30,26 @@ AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit) : MCELFObjectTargetWriter(Is64Bit, ELF::ELFOSABI_AMDGPU_HSA, - ELF::EM_AMDGPU, false) { } + ELF::EM_AMDGPU, true) { } + +unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx, + const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); + switch (Modifier) { + case MCSymbolRefExpr::VK_None: + switch (Fixup.getKind()) { + case FK_Data_4: + return ELF::R_AMDGPU_32_LOW; + default: + break; + } + default: + break; + } + llvm_unreachable("unhandled relocation type"); +} MCObjectWriter *llvm::createAMDGPUELFObjectWriter(bool Is64Bit, raw_pwrite_stream &OS) { MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter(Is64Bit); Index: test/MC/AMDGPU/elf64-relocs.s =================================================================== --- test/MC/AMDGPU/elf64-relocs.s +++ test/MC/AMDGPU/elf64-relocs.s @@ -0,0 +1,220 @@ +; RUN: llvm-mc -triple amdgcn--amdhsa -filetype obj -o - %s | llvm-readobj -r - | FileCheck %s + +; CHECK: Section (10) .rela.debug_info { +; CHECK: 0x6 R_AMDGPU_32_LOW .debug_abbrev 0x0 +; CHECK: 0xC R_AMDGPU_32_LOW .debug_str 0x0 +; CHECK: 0x12 R_AMDGPU_32_LOW .debug_str 0x23 +; CHECK: 0x16 R_AMDGPU_32_LOW .debug_line 0x0 +; CHECK: 0x1A R_AMDGPU_32_LOW .debug_str 0x32 +; CHECK: 0x1F R_AMDGPU_32_LOW .text 0x0 +; CHECK: 0x23 R_AMDGPU_32_LOW .text 0x104 +; CHECK: 0x28 R_AMDGPU_32_LOW .text 0x0 +; CHECK: 0x2C R_AMDGPU_32_LOW .text 0x104 +; CHECK: 0x30 R_AMDGPU_32_LOW .debug_str 0x51 +; CHECK: } +; CHECK: Section (14) .rela.debug_pubnames { +; CHECK: 0x6 R_AMDGPU_32_LOW .debug_info 0x0 +; CHECK: } +; CHECK: Section (18) .rela.debug_line { +; CHECK: 0x32 R_AMDGPU_32_LOW .text 0x0 +; CHECK: } + + .text + .hsa_code_object_version 2,0 + .hsa_code_object_isa 8,0,3,"AMD","AMDGPU" + .section .AMDGPU.config + .text + .globl relocs_test + .p2align 8 + .type relocs_test,@function + .amdgpu_hsa_kernel relocs_test +relocs_test: ; @relocs_test +.Lfunc_begin0: + .file 1 "relocs-test.cl" + .loc 1 1 0 ; relocs-test.cl:1:0 + .amd_kernel_code_t + kernel_code_version_major = 1 + kernel_code_version_minor = 0 + machine_kind = 1 + machine_version_major = 8 + machine_version_minor = 0 + machine_version_stepping = 3 + kernel_code_entry_byte_offset = 256 + kernel_code_prefetch_byte_size = 0 + max_scratch_backing_memory_byte_size = 0 + compute_pgm_rsrc1_vgprs = 0 + compute_pgm_rsrc1_sgprs = 0 + compute_pgm_rsrc1_priority = 0 + compute_pgm_rsrc1_float_mode = 192 + compute_pgm_rsrc1_priv = 0 + compute_pgm_rsrc1_dx10_clamp = 1 + compute_pgm_rsrc1_debug_mode = 0 + compute_pgm_rsrc1_ieee_mode = 0 + compute_pgm_rsrc2_scratch_en = 0 + compute_pgm_rsrc2_user_sgpr = 6 + compute_pgm_rsrc2_tgid_x_en = 1 + compute_pgm_rsrc2_tgid_y_en = 0 + compute_pgm_rsrc2_tgid_z_en = 0 + compute_pgm_rsrc2_tg_size_en = 0 + compute_pgm_rsrc2_tidig_comp_cnt = 0 + compute_pgm_rsrc2_excp_en_msb = 0 + compute_pgm_rsrc2_lds_size = 0 + compute_pgm_rsrc2_excp_en = 0 + enable_sgpr_private_segment_buffer = 1 + enable_sgpr_dispatch_ptr = 0 + enable_sgpr_queue_ptr = 0 + enable_sgpr_kernarg_segment_ptr = 1 + enable_sgpr_dispatch_id = 0 + enable_sgpr_flat_scratch_init = 0 + enable_sgpr_private_segment_size = 0 + enable_sgpr_grid_workgroup_count_x = 0 + enable_sgpr_grid_workgroup_count_y = 0 + enable_sgpr_grid_workgroup_count_z = 0 + enable_ordered_append_gds = 0 + private_element_size = 1 + is_ptr64 = 1 + is_dynamic_callstack = 0 + is_debug_enabled = 0 + is_xnack_enabled = 0 + workitem_private_segment_byte_size = 0 + workgroup_group_segment_byte_size = 0 + gds_segment_byte_size = 0 + kernarg_segment_byte_size = 0 + workgroup_fbarrier_count = 0 + wavefront_sgpr_count = 1 + workitem_vgpr_count = 1 + reserved_vgpr_first = 0 + reserved_vgpr_count = 0 + reserved_sgpr_first = 0 + reserved_sgpr_count = 0 + debug_wavefront_private_segment_offset_sgpr = 0 + debug_private_segment_buffer_sgpr = 0 + kernarg_segment_alignment = 4 + group_segment_alignment = 4 + private_segment_alignment = 4 + wavefront_size = 6 + call_convention = 0 + runtime_loader_kernel_symbol = 0 + .end_amd_kernel_code_t +; BB#0: ; %entry + .loc 1 1 28 prologue_end ; relocs-test.cl:1:28 + s_endpgm +.Ltmp0: +.Lfunc_end0: + .size relocs_test, .Lfunc_end0-relocs_test + + .section .AMDGPU.csdata +; Kernel info: +; codeLenInByte = 4 +; NumSgprs: 1 +; NumVgprs: 1 +; FloatMode: 192 +; IeeeMode: 0 +; ScratchSize: 0 +; LDSByteSize: 0 bytes/workgroup (compile time only) +; ReservedVGPRFirst: 0 +; ReservedVGPRCount: 0 +; COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; COMPUTE_PGM_RSRC2:TGID_X_EN: 1 +; COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 +; COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 +; COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 3.9.0 (trunk 270072)" ; string offset=0 +.Linfo_string1: + .asciz "relocs-test.cl" ; string offset=35 +.Linfo_string2: + .asciz "/home/kzhuravl/Sandbox/testing" ; string offset=50 +.Linfo_string3: + .asciz "relocs_test" ; string offset=81 + .section .debug_loc + .section .debug_abbrev +.Lsection_abbrev: + .byte 1 ; Abbreviation Code + .byte 17 ; DW_TAG_compile_unit + .byte 1 ; DW_CHILDREN_yes + .byte 37 ; DW_AT_producer + .byte 14 ; DW_FORM_strp + .byte 19 ; DW_AT_language + .byte 5 ; DW_FORM_data2 + .byte 3 ; DW_AT_name + .byte 14 ; DW_FORM_strp + .byte 16 ; DW_AT_stmt_list + .byte 6 ; DW_FORM_data4 + .byte 27 ; DW_AT_comp_dir + .byte 14 ; DW_FORM_strp + .ascii "\341\177" ; DW_AT_APPLE_optimized + .byte 12 ; DW_FORM_flag + .byte 17 ; DW_AT_low_pc + .byte 1 ; DW_FORM_addr + .byte 18 ; DW_AT_high_pc + .byte 1 ; DW_FORM_addr + .byte 0 ; EOM(1) + .byte 0 ; EOM(2) + .byte 2 ; Abbreviation Code + .byte 46 ; DW_TAG_subprogram + .byte 0 ; DW_CHILDREN_no + .byte 17 ; DW_AT_low_pc + .byte 1 ; DW_FORM_addr + .byte 18 ; DW_AT_high_pc + .byte 1 ; DW_FORM_addr + .byte 3 ; DW_AT_name + .byte 14 ; DW_FORM_strp + .byte 58 ; DW_AT_decl_file + .byte 11 ; DW_FORM_data1 + .byte 59 ; DW_AT_decl_line + .byte 11 ; DW_FORM_data1 + .byte 63 ; DW_AT_external + .byte 12 ; DW_FORM_flag + .ascii "\341\177" ; DW_AT_APPLE_optimized + .byte 12 ; DW_FORM_flag + .byte 0 ; EOM(1) + .byte 0 ; EOM(2) + .byte 0 ; EOM(3) + .section .debug_info +.Lsection_info: +.Lcu_begin0: + .long 53 ; Length of Unit + .short 2 ; DWARF version number + .long .Lsection_abbrev ; Offset Into Abbrev. Section + .byte 4 ; Address Size (in bytes) + .byte 1 ; Abbrev [1] 0xb:0x2e DW_TAG_compile_unit + .long .Linfo_string0 ; DW_AT_producer + .short 12 ; DW_AT_language + .long .Linfo_string1 ; DW_AT_name + .long .Lline_table_start0 ; DW_AT_stmt_list + .long .Linfo_string2 ; DW_AT_comp_dir + .byte 1 ; DW_AT_APPLE_optimized + .long .Lfunc_begin0 ; DW_AT_low_pc + .long .Lfunc_end0 ; DW_AT_high_pc + .byte 2 ; Abbrev [2] 0x27:0x11 DW_TAG_subprogram + .long .Lfunc_begin0 ; DW_AT_low_pc + .long .Lfunc_end0 ; DW_AT_high_pc + .long .Linfo_string3 ; DW_AT_name + .byte 1 ; DW_AT_decl_file + .byte 1 ; DW_AT_decl_line + .byte 1 ; DW_AT_external + .byte 1 ; DW_AT_APPLE_optimized + .byte 0 ; End Of Children Mark + .section .debug_ranges +.Ldebug_range: + .section .debug_macinfo +.Ldebug_macinfo: +.Lcu_macro_begin0: + .byte 0 ; End Of Macro List Mark + .section .debug_pubnames + .long .LpubNames_end0-.LpubNames_begin0 ; Length of Public Names Info +.LpubNames_begin0: + .short 2 ; DWARF Version + .long .Lcu_begin0 ; Offset of Compilation Unit Info + .long 57 ; Compilation Unit Length + .long 39 ; DIE offset + .asciz "relocs_test" ; External Name + .long 0 ; End Mark +.LpubNames_end0: + + .ident "clang version 3.9.0 (trunk 270072)" + .section ".note.GNU-stack" + .section .debug_line +.Lline_table_start0: Index: test/Object/AMDGPU/elf64-relocs-visitor.test =================================================================== --- test/Object/AMDGPU/elf64-relocs-visitor.test +++ test/Object/AMDGPU/elf64-relocs-visitor.test @@ -0,0 +1,7 @@ +RUN: llvm-dwarfdump -debug-dump=info %p/../Inputs/elf64-amdgpu-relocs-visitor.o 2>&1 | FileCheck %s + +CHECK: DW_AT_producer [DW_FORM_strp] ( .debug_str[0x00000000] = "clang version 3.9.0 (trunk 270072)") +CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000023] = "relocs-test.cl") +CHECK: DW_AT_comp_dir [DW_FORM_strp] ( .debug_str[0x00000032] = "/home/kzhuravl/Sandbox/testing") +CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000051] = "relocs_test") +CHECK: DW_AT_decl_file [DW_FORM_data1] ("/home/kzhuravl/Sandbox/testing/relocs-test.cl") Index: test/Object/AMDGPU/elf64-relocs.yaml =================================================================== --- test/Object/AMDGPU/elf64-relocs.yaml +++ test/Object/AMDGPU/elf64-relocs.yaml @@ -0,0 +1,53 @@ +# RUN: yaml2obj -format=elf %s > %t +# RUN: llvm-readobj -r %t | FileCheck %s + +# CHECK: Relocations [ +# CHECK: Section (2) .rela.text { +# CHECK: 0x0 R_AMDGPU_NONE main 0x0 +# CHECK: 0x8 R_AMDGPU_32_LOW - 0x0 +# CHECK: 0x10 R_AMDGPU_32_HIGH - 0x0 +# CHECK: 0x18 R_AMDGPU_64 - 0x0 +# CHECK: } +# CHECK: ] + +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_AMDGPU +Sections: + - Type: SHT_PROGBITS + Name: .text + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + AddressAlign: 0x08 + Content: 0000000000000000 + - Type: SHT_RELA + Name: .rela.text + Link: .symtab + Info: .text + AddressAlign: 0x08 + Relocations: + - Offset: 0x0 + Symbol: main + Type: R_AMDGPU_NONE + - Offset: 0x8 + Symbol: a + Type: R_AMDGPU_32_LOW + - Offset: 0x10 + Symbol: b + Type: R_AMDGPU_32_HIGH + - Offset: 0x18 + Symbol: c + Type: R_AMDGPU_64 + +Symbols: + Local: + - Name: .text + Type: STT_SECTION + Section: .text + + Global: + - Name: main + Type: STT_FUNC + Section: .text + Size: 0x08