Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1869,15 +1869,26 @@ } } +bool static isConstGVWithInit(const GlobalValue *GV) { + const GlobalVariable *GVar = dyn_cast(GV); + return GVar && + (GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || + GVar->isConstant()) && + GVar->hasDefinitiveInitializer() && + GVar->hasUniqueInitializer(); +} + bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const { const Triple &TT = getTargetMachine().getTargetTriple(); - return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + return (isConstGVWithInit(GV) || + GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) && AMDGPU::shouldEmitConstantsToTextSection(TT); } bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const { return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) && + !isConstGVWithInit(GV) && !shouldEmitFixup(GV) && !getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); } @@ -2116,7 +2127,9 @@ SDValue PtrHi = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, GAFlags == SIInstrInfo::MO_NONE ? GAFlags : GAFlags + 1); - return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, PtrLo, PtrHi); + SDValue Ret = DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, MVT::i64, + PtrLo, PtrHi); + return DAG.getZExtOrTrunc(Ret, DL, PtrVT); } SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, @@ -2124,8 +2137,7 @@ SelectionDAG &DAG) const { GlobalAddressSDNode *GSD = cast(Op); - if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && - GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) + if (GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); SDLoc DL(GSD); @@ -2137,20 +2149,23 @@ else if (shouldEmitPCReloc(GV)) return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT, SIInstrInfo::MO_REL32); + else if (shouldEmitGOTReloc(GV)) { - SDValue GOTAddr = buildPCRelGlobalAddress(DAG, GV, DL, 0, PtrVT, - SIInstrInfo::MO_GOTPCREL32); + SDValue GOTAddr = buildPCRelGlobalAddress(DAG, GV, DL, 0, PtrVT, + SIInstrInfo::MO_GOTPCREL32); - Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext()); - PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); - const DataLayout &DataLayout = DAG.getDataLayout(); - unsigned Align = DataLayout.getABITypeAlignment(PtrTy); - // FIXME: Use a PseudoSourceValue once those can be assigned an address space. - MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); + Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext()); + PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); + const DataLayout &DataLayout = DAG.getDataLayout(); + unsigned Align = DataLayout.getABITypeAlignment(PtrTy); + // FIXME: Use a PseudoSourceValue once those can be assigned an address space. + MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); - return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), GOTAddr, PtrInfo, Align, - MachineMemOperand::MODereferenceable | - MachineMemOperand::MOInvariant); + return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), GOTAddr, PtrInfo, Align, + MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant); + } + return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); } SDValue SITargetLowering::lowerTRAP(SDValue Op, Index: test/CodeGen/AMDGPU/global-constant.ll =================================================================== --- test/CodeGen/AMDGPU/global-constant.ll +++ test/CodeGen/AMDGPU/global-constant.ll @@ -4,6 +4,8 @@ @private1 = private unnamed_addr addrspace(2) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0] @private2 = private unnamed_addr addrspace(2) constant [4 x float] [float 4.0, float 5.0, float 6.0, float 7.0] @available_externally = available_externally addrspace(2) global [256 x i32] zeroinitializer +@internal_priv_as = internal unnamed_addr constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0] +@internal_global_as = internal unnamed_addr addrspace(1) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0] ; GCN-LABEL: {{^}}private_test: ; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}} @@ -26,13 +28,39 @@ ; HSA: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], private2@rel32@lo+4 ; HSA: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], private2@rel32@hi+4 +; GCN: s_getpc_b64 s{{\[}}[[PC2_LO:[0-9]+]]:[[PC2_HI:[0-9]+]]{{\]}} + +; Non-HSA OSes use fixup into .text section. +; NOHSA: s_add_u32 s{{[0-9]+}}, s[[PC2_LO]], internal_priv_as +; NOHSA: s_addc_u32 s{{[0-9]+}}, s[[PC2_HI]], 0 + +; HSA OSes use relocations. +; HSA: s_add_u32 s{{[0-9]+}}, s[[PC2_LO]], internal_priv_as@rel32@lo+4 +; HSA: s_addc_u32 s{{[0-9]+}}, s[[PC2_HI]], internal_priv_as@rel32@hi+4 + +; GCN: s_getpc_b64 s{{\[}}[[PC3_LO:[0-9]+]]:[[PC3_HI:[0-9]+]]{{\]}} + +; Non-HSA OSes use fixup into .text section. +; NOHSA: s_add_u32 s{{[0-9]+}}, s[[PC3_LO]], internal_global_as +; NOHSA: s_addc_u32 s{{[0-9]+}}, s[[PC3_HI]], 0 + +; HSA OSes use relocations. +; HSA: s_add_u32 s{{[0-9]+}}, s[[PC3_LO]], internal_global_as@rel32@lo+4 +; HSA: s_addc_u32 s{{[0-9]+}}, s[[PC3_HI]], internal_global_as@rel32@hi+4 + define void @private_test(i32 %index, float addrspace(1)* %out) { %ptr = getelementptr [4 x float], [4 x float] addrspace(2) * @private1, i32 0, i32 %index %val = load float, float addrspace(2)* %ptr - store float %val, float addrspace(1)* %out + store volatile float %val, float addrspace(1)* %out %ptr2 = getelementptr [4 x float], [4 x float] addrspace(2) * @private2, i32 0, i32 %index %val2 = load float, float addrspace(2)* %ptr2 - store float %val2, float addrspace(1)* %out + store volatile float %val2, float addrspace(1)* %out + %ptr3 = getelementptr [4 x float], [4 x float]* @internal_priv_as, i32 0, i32 %index + %val3 = load float, float* %ptr3 + store volatile float %val3, float addrspace(1)* %out + %ptr4 = getelementptr [4 x float], [4 x float] addrspace(1)* @internal_global_as, i32 0, i32 %index + %val4 = load float, float addrspace(1)* %ptr4 + store volatile float %val4, float addrspace(1)* %out ret void }