Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -106,7 +106,7 @@ AMDGPUTargetStreamer *TS = static_cast(OutStreamer->getTargetStreamer()); - TS->EmitDirectiveHSACodeObjectVersion(2, 0); + TS->EmitDirectiveHSACodeObjectVersion(2, 1); AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits()); TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping, Index: lib/Target/AMDGPU/AMDGPUMCInstLower.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -39,6 +39,13 @@ AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st): Ctx(ctx), ST(st) { } +static MCSymbolRefExpr::VariantKind getVariantKind(unsigned MOFlags) { + switch (MOFlags) { + default: return MCSymbolRefExpr::VK_None; + case SIInstrInfo::MO_GOTPCREL: return MCSymbolRefExpr::VK_GOTPCREL; + } +} + void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(MI->getOpcode()); @@ -69,7 +76,8 @@ case MachineOperand::MO_GlobalAddress: { const GlobalValue *GV = MO.getGlobal(); MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName())); - const MCExpr *SymExpr = MCSymbolRefExpr::create(Sym, Ctx); + const MCExpr *SymExpr = + MCSymbolRefExpr::create(Sym, getVariantKind(MO.getTargetFlags()),Ctx); const MCExpr *Expr = MCBinaryExpr::createAdd(SymExpr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); MCOp = MCOperand::createExpr(Expr); Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -134,9 +134,9 @@ } static Reloc::Model getEffectiveRelocModel(Optional RM) { - if (!RM.hasValue()) - return Reloc::PIC_; - return *RM; + // The AMDGPU toolchain only supports generating shared objects, so we + // must always use PIC. + return Reloc::PIC_; } AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -510,9 +510,11 @@ const Value *Ptr = MemNode->getMemOperand()->getValue(); // UndefValue means this is a load of a kernel input. These are uniform. - // Sometimes LDS instructions have constant pointers - if (isa(Ptr) || isa(Ptr) || isa(Ptr) || - isa(Ptr)) + // Sometimes LDS instructions have constant pointers. + // If Ptr is null, then that means this mem operand contains a + // PseudoSourceValue like GOT. + if (!Ptr || isa(Ptr) || isa(Ptr) || + isa(Ptr) || isa(Ptr)) return true; const Instruction *I = dyn_cast_or_null(Ptr); @@ -1460,27 +1462,22 @@ return DAG.getUNDEF(ASC->getValueType(0)); } +static bool shouldEmitGOTReloc(const GlobalValue *GV, + const TargetMachine &TM) { + return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && + !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); +} + bool SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { - if (GA->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) - return false; - - return TargetLowering::isOffsetFoldingLegal(GA); + // We can fold offsets for anything that doesn't require a GOT relocation. + return GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && + !shouldEmitGOTReloc(GA->getGlobal(), getTargetMachine()); } -SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, - SDValue Op, - SelectionDAG &DAG) const { - GlobalAddressSDNode *GSD = cast(Op); - - if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && - GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) - return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); - - SDLoc DL(GSD); - const GlobalValue *GV = GSD->getGlobal(); - EVT PtrVT = Op.getValueType(); - +static SDValue buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV, + SDLoc DL, unsigned Offset, EVT PtrVT, + unsigned GAFlags = SIInstrInfo::MO_NONE) { // In order to support pc-relative addressing, the PC_ADD_REL_OFFSET SDNode is // lowered to the following code sequence: // s_getpc_b64 s[0:1] @@ -1498,11 +1495,41 @@ // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too // small. This requires us to add 4 to the global variable offset in order to // compute the correct address. - SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, - GSD->getOffset() + 4); + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, + GAFlags); return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, GA); } +SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, + SDValue Op, + SelectionDAG &DAG) const { + GlobalAddressSDNode *GSD = cast(Op); + + if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && + GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) + return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); + + SDLoc DL(GSD); + const GlobalValue *GV = GSD->getGlobal(); + EVT PtrVT = Op.getValueType(); + + if (!shouldEmitGOTReloc(GV, getTargetMachine())) + return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT); + + SDValue GOTAddr = buildPCRelGlobalAddress(DAG, GV, DL, 0, PtrVT, + SIInstrInfo::MO_GOTPCREL); + + Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext()); + PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); + const DataLayout &DataLayout = DAG.getDataLayout(); + unsigned Align = DataLayout.getABITypeAlignment(PtrTy); + // FIXME: Use a PseudoSourceValue once those can be assigned an address space. + MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); + + return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), GOTAddr, + PtrInfo, false, false, true, Align); +} + SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { const MachineFunction &MF = DAG.getMachineFunction(); Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -91,6 +91,12 @@ unsigned OpIdx1) const override; public: + + enum TargetOperandFlags { + MO_NONE = 0, + MO_GOTPCREL = 1 + }; + explicit SIInstrInfo(const SISubtarget &); const SIRegisterInfo &getRegisterInfo() const { Index: test/CodeGen/AMDGPU/global-variable-relocs.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/global-variable-relocs.ll @@ -0,0 +1,203 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji < %s | FileCheck %s + +@private = private addrspace(1) global [256 x i32] zeroinitializer +@internal = internal addrspace(1) global [256 x i32] zeroinitializer +@available_externally = available_externally addrspace(1) global [256 x i32] zeroinitializer +@linkonce = linkonce addrspace(1) global [256 x i32] zeroinitializer +@weak= weak addrspace(1) global [256 x i32] zeroinitializer +@common = common addrspace(1) global [256 x i32] zeroinitializer +@extern_weak = extern_weak addrspace(1) global [256 x i32] +@linkonce_odr = linkonce_odr addrspace(1) global [256 x i32] zeroinitializer +@weak_odr = weak_odr addrspace(1) global [256 x i32] zeroinitializer +@external = external addrspace(1) global [256 x i32] +@external_w_init = addrspace(1) global [256 x i32] zeroinitializer + +; CHECK-LABEL: private_test: +; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], private+8 +; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], 0 +; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[ADDR_LO]] +; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[ADDR_HI]] +; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +define void @private_test(i32 addrspace(1)* %out) { + %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @private, i32 0, i32 1 + %val = load i32, i32 addrspace(1)* %ptr + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: internal_test: +; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], internal+8 +; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], 0 +; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[ADDR_LO]] +; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[ADDR_HI]] +; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +define void @internal_test(i32 addrspace(1)* %out) { + %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @internal, i32 0, i32 1 + %val = load i32, i32 addrspace(1)* %ptr + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: available_externally_test: +; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], available_externally@GOTPCREL+4 +; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0 +; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0 +; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4 +; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0 +; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]] +; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]] +; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +define void @available_externally_test(i32 addrspace(1)* %out) { + %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @available_externally, i32 0, i32 1 + %val = load i32, i32 addrspace(1)* %ptr + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: linkonce_test: +; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], linkonce@GOTPCREL+4 +; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0 +; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0 +; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4 +; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0 +; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]] +; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]] +; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +define void @linkonce_test(i32 addrspace(1)* %out) { + %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @linkonce, i32 0, i32 1 + %val = load i32, i32 addrspace(1)* %ptr + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: weak_test: +; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], weak@GOTPCREL+4 +; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0 +; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0 +; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4 +; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0 +; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]] +; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]] +; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +define void @weak_test(i32 addrspace(1)* %out) { + %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @weak, i32 0, i32 1 + %val = load i32, i32 addrspace(1)* %ptr + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: common_test: +; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], common@GOTPCREL+4 +; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0 +; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0 +; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4 +; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0 +; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]] +; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]] +; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +define void @common_test(i32 addrspace(1)* %out) { + %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @common, i32 0, i32 1 + %val = load i32, i32 addrspace(1)* %ptr + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: extern_weak_test: +; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], extern_weak@GOTPCREL+4 +; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0 +; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0 +; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4 +; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0 +; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]] +; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]] +; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +define void @extern_weak_test(i32 addrspace(1)* %out) { + %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @extern_weak, i32 0, i32 1 + %val = load i32, i32 addrspace(1)* %ptr + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: linkonce_odr_test: +; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], linkonce_odr@GOTPCREL+4 +; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0 +; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0 +; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4 +; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0 +; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]] +; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]] +; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +define void @linkonce_odr_test(i32 addrspace(1)* %out) { + %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @linkonce_odr, i32 0, i32 1 + %val = load i32, i32 addrspace(1)* %ptr + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: weak_odr_test: +; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], weak_odr@GOTPCREL+4 +; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0 +; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0 +; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4 +; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0 +; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]] +; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]] +; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +define void @weak_odr_test(i32 addrspace(1)* %out) { + %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @weak_odr, i32 0, i32 1 + %val = load i32, i32 addrspace(1)* %ptr + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: external_test: +; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], external@GOTPCREL+4 +; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0 +; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0 +; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4 +; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0 +; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]] +; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]] +; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +define void @external_test(i32 addrspace(1)* %out) { + %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @external, i32 0, i32 1 + %val = load i32, i32 addrspace(1)* %ptr + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: external_w_init_test: +; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], external_w_init@GOTPCREL+4 +; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0 +; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0 +; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4 +; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0 +; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]] +; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]] +; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +define void @external_w_init_test(i32 addrspace(1)* %out) { + %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @external_w_init, i32 0, i32 1 + %val = load i32, i32 addrspace(1)* %ptr + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; CHECK: .local private +; CHECK: .local internal +; CHECK: .weak linkonce +; CHECK: .weak weak +; CHECK: .weak linkonce_odr +; CHECK: .weak weak_odr +; CHECK-NOT: external{{$}} +; CHECK: .globl external_w_init Index: test/CodeGen/AMDGPU/global-zero-initializer.ll =================================================================== --- test/CodeGen/AMDGPU/global-zero-initializer.ll +++ /dev/null @@ -1,18 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck %s - -; CHECK: {{^}}load_init_global_global: -; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} -; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], global+4 -; CHECK: s_addc_u32 s5, s[[PC_HI]], 0 -; CHECK: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[ADDR_LO]]:7], 0 offset:40 -; CHECK: global: -; CHECK: .zero 1024 -@global = addrspace(1) global [256 x i32] zeroinitializer - -define void @load_init_global_global(i32 addrspace(1)* %out, i1 %p) { - %gep = getelementptr [256 x i32], [256 x i32] addrspace(1)* @global, i32 0, i32 10 - %ld = load i32, i32 addrspace(1)* %gep - store i32 %ld, i32 addrspace(1)* %out - ret void -} Index: test/CodeGen/AMDGPU/hsa-func.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-func.ll +++ test/CodeGen/AMDGPU/hsa-func.ll @@ -18,7 +18,7 @@ ; ELF: SHT_NOTE ; ELF: 0000: 04000000 08000000 01000000 414D4400 -; ELF: 0010: 02000000 00000000 04000000 1B000000 +; ELF: 0010: 02000000 01000000 04000000 1B000000 ; ELF: 0020: 03000000 414D4400 04000700 07000000 ; ELF: 0030: 00000000 00000000 414D4400 414D4447 @@ -30,7 +30,7 @@ ; ELF: Type: Function (0x2) ; ELF: } -; HSA: .hsa_code_object_version 2,0 +; HSA: .hsa_code_object_version 2,1 ; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" ; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" Index: test/CodeGen/AMDGPU/hsa-note-no-func.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-note-no-func.ll +++ test/CodeGen/AMDGPU/hsa-note-no-func.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA --check-prefix=HSA-VI %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji | FileCheck --check-prefix=HSA --check-prefix=HSA-FIJI %s -; HSA: .hsa_code_object_version 2,0 +; HSA: .hsa_code_object_version 2,1 ; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" ; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" ; HSA-FIJI: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU" Index: test/CodeGen/AMDGPU/hsa.ll =================================================================== --- test/CodeGen/AMDGPU/hsa.ll +++ test/CodeGen/AMDGPU/hsa.ll @@ -18,7 +18,7 @@ ; ELF: SHT_NOTE ; ELF: 0000: 04000000 08000000 01000000 414D4400 -; ELF: 0010: 02000000 00000000 04000000 1B000000 +; ELF: 0010: 02000000 01000000 04000000 1B000000 ; ELF: 0020: 03000000 414D4400 04000700 07000000 ; ELF: 0030: 00000000 00000000 414D4400 414D4447 ; ELF: 0040: 50550000 @@ -29,7 +29,7 @@ ; ELF: Type: AMDGPU_HSA_KERNEL (0xA) ; ELF: } -; HSA: .hsa_code_object_version 2,0 +; HSA: .hsa_code_object_version 2,1 ; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" ; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"