Index: llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -667,6 +667,11 @@ LegalizeRuleSet &unsupportedIf(LegalityPredicate Predicate) { return actionIf(LegalizeAction::Unsupported, Predicate); } + + LegalizeRuleSet &unsupportedFor(std::initializer_list Types) { + return actionFor(LegalizeAction::Unsupported, Types); + } + LegalizeRuleSet &unsupportedIfMemSizeNotPow2() { return actionIf(LegalizeAction::Unsupported, LegalityPredicates::memSizeInBytesNotPow2(0)); Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -372,7 +372,8 @@ setAction({G_FRAME_INDEX, PrivatePtr}, Legal); getActionDefinitionsBuilder(G_GLOBAL_VALUE) - .customFor({LocalPtr, GlobalPtr, ConstantPtr, Constant32Ptr}); + .unsupportedFor({PrivatePtr}) + .custom(); setAction({G_BLOCK_ADDR, CodePtr}, Legal); auto &FPOpActions = getActionDefinitionsBuilder( Index: llvm/lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.h +++ llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -263,6 +263,12 @@ bool isMemOpUniform(const SDNode *N) const; bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const; + static bool isNonGlobalAddrSpace(unsigned AS) { + return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS || + AS == AMDGPUAS::PRIVATE_ADDRESS; + } + + // FIXME: Missing constant_32bit static bool isFlatGlobalAddrSpace(unsigned AS) { return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS || Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4384,9 +4384,7 @@ // FIXME: Either avoid relying on address space here or change the default // address space for functions to avoid the explicit check. return (GV->getValueType()->isFunctionTy() || - GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || - GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || - GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) && + !isNonGlobalAddrSpace(GV->getAddressSpace())) && !shouldEmitFixup(GV) && !getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll @@ -0,0 +1,15 @@ +; RUN: not --crash llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s + +; FIXME: Should produce context error for each one +; ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(p5) = G_GLOBAL_VALUE @external_private (in function: fn_external_private) + +@external_private = external addrspace(5) global i32, align 4 +@internal_private = internal addrspace(5) global i32 undef, align 4 + +define i32 addrspace(5)* @fn_external_private() { + ret i32 addrspace(5)* @external_private +} + +define i32 addrspace(5)* @fn_internal_private() { + ret i32 addrspace(5)* @internal_private +} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll @@ -5,10 +5,12 @@ @external_constant = external addrspace(4) constant i32, align 4 @external_constant32 = external addrspace(6) constant i32, align 4 @external_global = external addrspace(1) global i32, align 4 +@external_other = external addrspace(999) global i32, align 4 @internal_constant = internal addrspace(4) constant i32 9, align 4 @internal_constant32 = internal addrspace(6) constant i32 9, align 4 @internal_global = internal addrspace(1) global i32 9, align 4 +@internal_other = internal addrspace(999) global i32 9, align 4 define i32 addrspace(4)* @external_constant_got() { @@ -62,6 +64,32 @@ ret i32 addrspace(1)* @external_global } +define i32 addrspace(999)* @external_other_got() { + ; HSA-LABEL: name: external_other_got + ; HSA: bb.1 (%ir-block.0): + ; HSA: liveins: $sgpr30_sgpr31 + ; HSA: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; HSA: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 4, implicit-def $scc + ; HSA: [[LOAD:%[0-9]+]]:_(p999) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4) + ; HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p999) + ; HSA: $vgpr0 = COPY [[UV]](s32) + ; HSA: $vgpr1 = COPY [[UV1]](s32) + ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; PAL-LABEL: name: external_other_got + ; PAL: bb.1 (%ir-block.0): + ; PAL: liveins: $sgpr30_sgpr31 + ; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 4, implicit-def $scc + ; PAL: [[LOAD:%[0-9]+]]:_(p999) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4) + ; PAL: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p999) + ; PAL: $vgpr0 = COPY [[UV]](s32) + ; PAL: $vgpr1 = COPY [[UV1]](s32) + ; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ret i32 addrspace(999)* @external_other +} + define i32 addrspace(4)* @internal_constant_pcrel() { ; HSA-LABEL: name: internal_constant_pcrel ; HSA: bb.1 (%ir-block.0): @@ -110,6 +138,30 @@ ret i32 addrspace(1)* @internal_global } +define i32 addrspace(999)* @internal_other_pcrel() { + ; HSA-LABEL: name: internal_other_pcrel + ; HSA: bb.1 (%ir-block.0): + ; HSA: liveins: $sgpr30_sgpr31 + ; HSA: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; HSA: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other + 4, target-flags(amdgpu-rel32-hi) @internal_other + 4, implicit-def $scc + ; HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p999) + ; HSA: $vgpr0 = COPY [[UV]](s32) + ; HSA: $vgpr1 = COPY [[UV1]](s32) + ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; PAL-LABEL: name: internal_other_pcrel + ; PAL: bb.1 (%ir-block.0): + ; PAL: liveins: $sgpr30_sgpr31 + ; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other + 4, target-flags(amdgpu-rel32-hi) @internal_other + 4, implicit-def $scc + ; PAL: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p999) + ; PAL: $vgpr0 = COPY [[UV]](s32) + ; PAL: $vgpr1 = COPY [[UV1]](s32) + ; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ret i32 addrspace(999)* @internal_other +} + define i32 addrspace(6)* @external_constant32_got() { ; HSA-LABEL: name: external_constant32_got ; HSA: bb.1 (%ir-block.0):