Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -45,6 +45,12 @@ if (Target.getSymA()->getSymbol().getName() == "SCRATCH_RSRC_DWORD1") return ELF::R_AMDGPU_ABS32_HI; + switch (Fixup.getKind()) { + default: break; + case FK_PCRel_4: + return ELF::R_AMDGPU_REL32; + } + llvm_unreachable("unhandled relocation type"); } Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1420,7 +1420,8 @@ SelectionDAG &DAG) const { GlobalAddressSDNode *GSD = cast(Op); - if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) + if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && + GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); SDLoc DL(GSD); Index: llvm/trunk/test/CodeGen/AMDGPU/global-zero-initializer.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/global-zero-initializer.ll +++ llvm/trunk/test/CodeGen/AMDGPU/global-zero-initializer.ll @@ -1,12 +1,17 @@ -; RUN: not llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s -; RUN: not llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck %s -; CHECK: in function load_init_global_global{{.*}}: unsupported initializer for address space - -@lds = addrspace(1) global [256 x i32] zeroinitializer +; CHECK: {{^}}load_init_global_global: +; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], global+4 +; CHECK: s_addc_u32 s5, s[[PC_HI]], 0 +; CHECK: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[ADDR_LO]]:7], 0 offset:40 +; CHECK: global: +; CHECK: .zero 1024 +@global = addrspace(1) global [256 x i32] zeroinitializer define void @load_init_global_global(i32 addrspace(1)* %out, i1 %p) { - %gep = getelementptr [256 x i32], [256 x i32] addrspace(1)* @lds, i32 0, i32 10 + %gep = getelementptr [256 x i32], [256 x i32] addrspace(1)* @global, i32 0, i32 10 %ld = load i32, i32 addrspace(1)* %gep store i32 %ld, i32 addrspace(1)* %out ret void Index: llvm/trunk/test/CodeGen/AMDGPU/wqm.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/wqm.ll +++ llvm/trunk/test/CodeGen/AMDGPU/wqm.ll @@ -40,6 +40,7 @@ ;CHECK: s_and_b64 exec, exec, [[ORIG]] ;CHECK: store ;CHECK-NOT: exec +;CHECK: .size test3 define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) { main_body: %tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)