Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -21,6 +21,7 @@ #include "InstPrinter/AMDGPUInstPrinter.h" #include "Utils/AMDGPUBaseInfo.h" #include "AMDGPU.h" +#include "AMDGPUHSATargetObjectFile.h" #include "AMDKernelCodeT.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" @@ -102,12 +103,14 @@ void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { - // This label is used to mark the end of the .text section. - const TargetLoweringObjectFile &TLOF = getObjFileLowering(); - OutStreamer->SwitchSection(TLOF.getTextSection()); - MCSymbol *EndOfTextLabel = - OutContext.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME)); - OutStreamer->EmitLabel(EndOfTextLabel); + if (Triple(M.getTargetTriple()).getOS() != Triple::AMDHSA) { + // This label is used to mark the end of the .text section. + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); + OutStreamer->SwitchSection(TLOF.getTextSection()); + MCSymbol *EndOfTextLabel = + OutContext.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME)); + OutStreamer->EmitLabel(EndOfTextLabel); + } } void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { Index: lib/Target/AMDGPU/AMDGPUMCInstLower.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -30,6 +30,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include @@ -40,6 +41,19 @@ Ctx(ctx), ST(st) { } +static MCSymbolRefExpr::VariantKind getVariantKind(unsigned RT) { + switch (RT) { + default: return MCSymbolRefExpr::VK_None; + case ELF::R_AMDGPU_32_LO: return MCSymbolRefExpr::VK_AMDGPU_32_LO; + case ELF::R_AMDGPU_32_HI: return MCSymbolRefExpr::VK_AMDGPU_32_HI; + case ELF::R_AMDGPU_64: return MCSymbolRefExpr::VK_AMDGPU_64; + case ELF::R_AMDGPU_INIT_SAMPLER: + return MCSymbolRefExpr::VK_AMDGPU_INIT_SAMPLER; + case ELF::R_AMDGPU_INIT_IMAGE: + return MCSymbolRefExpr::VK_AMDGPU_INIT_IMAGE; + } +} + void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(MI->getOpcode()); @@ -70,7 +84,8 @@ case MachineOperand::MO_GlobalAddress: { const GlobalValue *GV = MO.getGlobal(); MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName())); - MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx)); + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, + getVariantKind(MO.getTargetFlags()), Ctx)); break; } case MachineOperand::MO_TargetIndex: { Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -82,7 +82,7 @@ case FK_Data_8: return 8; default: - llvm_unreachable("Unknown fixup kind!"); + return 0; } } Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -9,6 +9,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPUMCTargetDesc.h" +#include "AMDGPU.h" +#include "AMDGPUFixupKinds.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" @@ -19,12 +21,23 @@ class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter { public: AMDGPUELFObjectWriter(bool Is64Bit); + virtual bool needsRelocateWithSymbol(const MCSymbol &Sym, + unsigned Type) const override { + switch (Type) { + default: return false; + case ELF::R_AMDGPU_32_LO: + case ELF::R_AMDGPU_32_HI: return true; + } + } protected: unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override { - return Fixup.getKind(); + switch ((unsigned)Fixup.getKind()) { + default: return Fixup.getKind(); + case AMDGPU::fixup_amdgpu_lo32: return ELF::R_AMDGPU_32_LO; + case AMDGPU::fixup_amdgpu_hi32: return ELF::R_AMDGPU_32_HI; + } } - }; Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h @@ -24,6 +24,9 @@ /// fixup for offset from instruction to end of text section fixup_si_end_of_text, + fixup_amdgpu_lo32, + fixup_amdgpu_hi32, + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind Index: lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -254,13 +254,22 @@ const MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME)); - if (&Expr->getSymbol() == Sym) { - // Add the offset to the beginning of the constant values. - Kind = (MCFixupKind)AMDGPU::fixup_si_end_of_text; - } else { - // This is used for constant data stored in .rodata. - Kind = (MCFixupKind)AMDGPU::fixup_si_rodata; - } + switch (Expr->getKind()) { + case MCSymbolRefExpr::VK_AMDGPU_32_LO: + Kind = (MCFixupKind)AMDGPU::fixup_amdgpu_lo32; + break; + case MCSymbolRefExpr::VK_AMDGPU_32_HI: + Kind = (MCFixupKind)AMDGPU::fixup_amdgpu_hi32; + break; + default: + if (&Expr->getSymbol() == Sym) { + // Add the offset to the beginning of the constant values. + Kind = (MCFixupKind)AMDGPU::fixup_si_end_of_text; + } else { + // This is used for constant data stored in .rodata. + Kind = (MCFixupKind)AMDGPU::fixup_si_rodata; + } + } Fixups.push_back(MCFixup::create(4, Expr, Kind, MI.getLoc())); } Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/Function.h" +#include "llvm/Support/ELF.h" #include "llvm/ADT/SmallString.h" using namespace llvm; @@ -969,8 +970,22 @@ if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); - SDLoc DL(GSD); const GlobalValue *GV = GSD->getGlobal(); + SDLoc DL(GSD); + + if (Subtarget->isAmdHsaOS()) { + SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, GSD->getOffset(), + ELF::R_AMDGPU_32_LO); + SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, GSD->getOffset(), + ELF::R_AMDGPU_32_HI); + + // We need to insert S_MOV_B32 nodes, because the InstEmitter will crash + // if a global address ends up in a target independent node. + Lo = SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Lo), 0); + Hi = SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Hi), 0); + return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); + } + MVT PtrVT = getPointerTy(DAG.getDataLayout(), GSD->getAddressSpace()); SDValue Ptr = DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT); Index: test/CodeGen/AMDGPU/hsa-globals.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-globals.ll +++ test/CodeGen/AMDGPU/hsa-globals.ll @@ -1,9 +1,9 @@ ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA-CI --check-prefix=HSA %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA-VI --check-prefix=HSA %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj -symbols -s -sd | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj -symbols -s -sd -relocations | FileCheck --check-prefix=ELF %s -; FIXME: Add assembler support for globals -; FIXME-XUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF +; FIXME: Add assembler support for globals and relocations +; FIXME-XUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri | llvm-readobj -symbols -s -sd -relocations | FileCheck %s --check-prefix=ELF @const_array = internal unnamed_addr addrspace(2) constant [64 x i8] c"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789<>", align 1 @@ -16,6 +16,12 @@ ; ELF: SHF_AMDGPU_HSA_READONLY (0x200000) ; ELF: ] +; ELF: Relocations [ +; ELF: Section ({{[0-9]+}}) .rel.hsatext { +; ELF-DAG: R_AMDGPU_32_LO const_array +; ELF-DAG: R_AMDGPU_32_HI const_array +; ELF: } + ; ELF: Symbol { ; ELF: Name: const_array ; ELF: Binding: Local (0x0) @@ -23,12 +29,22 @@ ; ELF: Section: .hsarodata_readonly_agent (0x6) ; ELF: } +; HSA-LABEL: {{^}}test: +; HSA-DAG: s_mov_b32 s[[LORELOC:[0-9]+]], const_array@LO +; HSA-DAG: s_mov_b32 s[[HIRELOC:[0-9]+]], const_array@HI +; HSA-VI: s_add_u32 s[[SPTRLO:[0-9]+]], s[[LORELOC]], s{{[0-9]+}} +; HSA-VI: s_addc_u32 s[[SPTRHI:[0-9]+]], s[[HIRELOC]], s{{[0-9]+}} +; HSA-VI-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[SPTRLO]] +; HSA-VI-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[SPTRHI]] +; HSA-VI: flat_load_ubyte v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}} +; HSA-CI: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s{{\[}}[[LORELOC]]:{{[0-9]+}}] +; HSA: s_endpgm ; HSA: .amdgpu_hsa_module_global const_array ; HSA: .hsarodata_readonly_agent ; HSA: const_array: ; HSA: .ascii "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789<>" - +; HSA-NOT: EndOfTextLabel define void @test(i8 addrspace(1) *%out, i32 %offset) { entry: %ptr = getelementptr [64 x i8], [64 x i8] addrspace(2)* @const_array, i32 0, i32 %offset