Index: include/llvm/Object/RelocVisitor.h =================================================================== --- include/llvm/Object/RelocVisitor.h +++ include/llvm/Object/RelocVisitor.h @@ -143,12 +143,12 @@ switch (RelocType) { case llvm::ELF::R_AMDGPU_ABS32_LO: case llvm::ELF::R_AMDGPU_ABS32_HI: + case llvm::ELF::R_AMDGPU_REL32: return visitELF_AMDGPU_32(R, Value); default: HasError = true; return RelocToApply(); } - default: HasError = true; return RelocToApply(); Index: include/llvm/Support/ELFRelocs/AMDGPU.def =================================================================== --- include/llvm/Support/ELFRelocs/AMDGPU.def +++ include/llvm/Support/ELFRelocs/AMDGPU.def @@ -5,3 +5,4 @@ ELF_RELOC(R_AMDGPU_NONE, 0) ELF_RELOC(R_AMDGPU_ABS32_LO, 1) ELF_RELOC(R_AMDGPU_ABS32_HI, 2) +ELF_RELOC(R_AMDGPU_REL32, 4) Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -47,12 +47,13 @@ return ELF::R_AMDGPU_ABS32_LO; if (Target.getSymA()->getSymbol().getName() == "SCRATCH_RSRC_DWORD1") return ELF::R_AMDGPU_ABS32_HI; + case FK_PCRel_4: + return ELF::R_AMDGPU_REL32; } llvm_unreachable("unhandled relocation type"); } - MCObjectWriter *llvm::createAMDGPUELFObjectWriter(bool Is64Bit, bool HasRelocationAddend, raw_pwrite_stream &OS) { Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1421,7 +1421,8 @@ SelectionDAG &DAG) const { GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op); - if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) + if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && + GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); SDLoc DL(GSD); Index: test/CodeGen/AMDGPU/global-zero-initializer.ll =================================================================== --- test/CodeGen/AMDGPU/global-zero-initializer.ll +++ test/CodeGen/AMDGPU/global-zero-initializer.ll @@ -1,12 +1,17 @@ -; RUN: not llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s -; RUN: not llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck %s -; CHECK: in function load_init_global_global{{.*}}: unsupported initializer for address space - -@lds = addrspace(1) global [256 x i32] zeroinitializer +; CHECK: {{^}}load_init_global_global: +; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], global+4 +; CHECK: s_addc_u32 s5, s[[PC_HI]], 0 +; CHECK: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[ADDR_LO]]:7], 0 offset:40 +; CHECK: global: +; CHECK: .zero 1024 +@global = addrspace(1) global [256 x i32] zeroinitializer define void @load_init_global_global(i32 addrspace(1)* %out, i1 %p) { - %gep = getelementptr [256 x i32], [256 x i32] addrspace(1)* @lds, i32 0, i32 10 + %gep = getelementptr [256 x i32], [256 x i32] addrspace(1)* @global, i32 0, i32 10 %ld = load i32, i32 addrspace(1)* %gep store i32 %ld, i32 addrspace(1)* %out ret void Index: test/CodeGen/AMDGPU/wqm.ll =================================================================== --- test/CodeGen/AMDGPU/wqm.ll +++ test/CodeGen/AMDGPU/wqm.ll @@ -40,6 +40,7 @@ ;CHECK: s_and_b64 exec, exec, [[ORIG]] ;CHECK: store ;CHECK-NOT: exec +;CHECK: .size test3 define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) { main_body: %tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)