diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -578,7 +578,7 @@ } bool shouldEmitConstantsToTextSection(const Triple &TT) { - return TT.getOS() == Triple::AMDPAL || TT.getArch() == Triple::r600; + return TT.getArch() == Triple::r600; } int getIntegerAttribute(const Function &F, StringRef Name, int Default) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=HSA %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=PAL %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=GCN %s @external_constant = external addrspace(4) constant i32, align 4 @external_constant32 = external addrspace(6) constant i32, align 4 @@ -14,195 +14,115 @@ define i32 addrspace(4)* @external_constant_got() { - ; HSA-LABEL: name: external_constant_got - ; HSA: bb.1 (%ir-block.0): - ; HSA: liveins: $sgpr30_sgpr31 - ; HSA: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; HSA: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant + 4, target-flags(amdgpu-gotprel32-hi) @external_constant + 4, implicit-def $scc - ; HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4) - ; HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p4) - ; HSA: $vgpr0 = COPY [[UV]](s32) - ; HSA: $vgpr1 = COPY [[UV1]](s32) - ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 - ; PAL-LABEL: name: external_constant_got - ; PAL: bb.1 (%ir-block.0): - ; PAL: liveins: $sgpr30_sgpr31 - ; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET @external_constant + 4, 0, implicit-def $scc - ; PAL: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4) - ; PAL: $vgpr0 = COPY [[UV]](s32) - ; PAL: $vgpr1 = COPY [[UV1]](s32) - ; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN-LABEL: name: external_constant_got + ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant + 4, target-flags(amdgpu-gotprel32-hi) @external_constant + 4, implicit-def $scc + ; GCN: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4) + ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p4) + ; GCN: $vgpr0 = COPY [[UV]](s32) + ; GCN: $vgpr1 = COPY [[UV1]](s32) + ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(4)* @external_constant } define i32 addrspace(1)* @external_global_got() { - ; HSA-LABEL: name: external_global_got - ; HSA: bb.1 (%ir-block.0): - ; HSA: liveins: $sgpr30_sgpr31 - ; HSA: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; HSA: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 4, implicit-def $scc - ; HSA: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4) - ; HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) - ; HSA: $vgpr0 = COPY [[UV]](s32) - ; HSA: $vgpr1 = COPY [[UV1]](s32) - ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 - ; PAL-LABEL: name: external_global_got - ; PAL: bb.1 (%ir-block.0): - ; PAL: liveins: $sgpr30_sgpr31 - ; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 4, implicit-def $scc - ; PAL: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4) - ; PAL: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) - ; PAL: $vgpr0 = COPY [[UV]](s32) - ; PAL: $vgpr1 = COPY [[UV1]](s32) - ; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN-LABEL: name: external_global_got + ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 4, implicit-def $scc + ; GCN: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4) + ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) + ; GCN: $vgpr0 = COPY [[UV]](s32) + ; GCN: $vgpr1 = COPY [[UV1]](s32) + ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(1)* @external_global } define i32 addrspace(999)* @external_other_got() { - ; HSA-LABEL: name: external_other_got - ; HSA: bb.1 (%ir-block.0): - ; HSA: liveins: $sgpr30_sgpr31 - ; HSA: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; HSA: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 4, implicit-def $scc - ; HSA: [[LOAD:%[0-9]+]]:_(p999) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4) - ; HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p999) - ; HSA: $vgpr0 = COPY [[UV]](s32) - ; HSA: $vgpr1 = COPY [[UV1]](s32) - ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 - ; PAL-LABEL: name: external_other_got - ; PAL: bb.1 (%ir-block.0): - ; PAL: liveins: $sgpr30_sgpr31 - ; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 4, implicit-def $scc - ; PAL: [[LOAD:%[0-9]+]]:_(p999) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4) - ; PAL: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p999) - ; PAL: $vgpr0 = COPY [[UV]](s32) - ; PAL: $vgpr1 = COPY [[UV1]](s32) - ; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN-LABEL: name: external_other_got + ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 4, implicit-def $scc + ; GCN: [[LOAD:%[0-9]+]]:_(p999) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4) + ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p999) + ; GCN: $vgpr0 = COPY [[UV]](s32) + ; GCN: $vgpr1 = COPY [[UV1]](s32) + ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(999)* @external_other } define i32 addrspace(4)* @internal_constant_pcrel() { - ; HSA-LABEL: name: internal_constant_pcrel - ; HSA: bb.1 (%ir-block.0): - ; HSA: liveins: $sgpr30_sgpr31 - ; HSA: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; HSA: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant + 4, target-flags(amdgpu-rel32-hi) @internal_constant + 4, implicit-def $scc - ; HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4) - ; HSA: $vgpr0 = COPY [[UV]](s32) - ; HSA: $vgpr1 = COPY [[UV1]](s32) - ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 - ; PAL-LABEL: name: internal_constant_pcrel - ; PAL: bb.1 (%ir-block.0): - ; PAL: liveins: $sgpr30_sgpr31 - ; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET @internal_constant + 4, 0, implicit-def $scc - ; PAL: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4) - ; PAL: $vgpr0 = COPY [[UV]](s32) - ; PAL: $vgpr1 = COPY [[UV1]](s32) - ; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN-LABEL: name: internal_constant_pcrel + ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant + 4, target-flags(amdgpu-rel32-hi) @internal_constant + 4, implicit-def $scc + ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4) + ; GCN: $vgpr0 = COPY [[UV]](s32) + ; GCN: $vgpr1 = COPY [[UV1]](s32) + ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(4)* @internal_constant } define i32 addrspace(1)* @internal_global_pcrel() { - ; HSA-LABEL: name: internal_global_pcrel - ; HSA: bb.1 (%ir-block.0): - ; HSA: liveins: $sgpr30_sgpr31 - ; HSA: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; HSA: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 4, implicit-def $scc - ; HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1) - ; HSA: $vgpr0 = COPY [[UV]](s32) - ; HSA: $vgpr1 = COPY [[UV1]](s32) - ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 - ; PAL-LABEL: name: internal_global_pcrel - ; PAL: bb.1 (%ir-block.0): - ; PAL: liveins: $sgpr30_sgpr31 - ; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 4, implicit-def $scc - ; PAL: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1) - ; PAL: $vgpr0 = COPY [[UV]](s32) - ; PAL: $vgpr1 = COPY [[UV1]](s32) - ; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN-LABEL: name: internal_global_pcrel + ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 4, implicit-def $scc + ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1) + ; GCN: $vgpr0 = COPY [[UV]](s32) + ; GCN: $vgpr1 = COPY [[UV1]](s32) + ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(1)* @internal_global } define i32 addrspace(999)* @internal_other_pcrel() { - ; HSA-LABEL: name: internal_other_pcrel - ; HSA: bb.1 (%ir-block.0): - ; HSA: liveins: $sgpr30_sgpr31 - ; HSA: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; HSA: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other + 4, target-flags(amdgpu-rel32-hi) @internal_other + 4, implicit-def $scc - ; HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p999) - ; HSA: $vgpr0 = COPY [[UV]](s32) - ; HSA: $vgpr1 = COPY [[UV1]](s32) - ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 - ; PAL-LABEL: name: internal_other_pcrel - ; PAL: bb.1 (%ir-block.0): - ; PAL: liveins: $sgpr30_sgpr31 - ; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other + 4, target-flags(amdgpu-rel32-hi) @internal_other + 4, implicit-def $scc - ; PAL: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p999) - ; PAL: $vgpr0 = COPY [[UV]](s32) - ; PAL: $vgpr1 = COPY [[UV1]](s32) - ; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN-LABEL: name: internal_other_pcrel + ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other + 4, target-flags(amdgpu-rel32-hi) @internal_other + 4, implicit-def $scc + ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p999) + ; GCN: $vgpr0 = COPY [[UV]](s32) + ; GCN: $vgpr1 = COPY [[UV1]](s32) + ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(999)* @internal_other } define i32 addrspace(6)* @external_constant32_got() { - ; HSA-LABEL: name: external_constant32_got - ; HSA: bb.1 (%ir-block.0): - ; HSA: liveins: $sgpr30_sgpr31 - ; HSA: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; HSA: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant32 + 4, target-flags(amdgpu-gotprel32-hi) @external_constant32 + 4, implicit-def $scc - ; HSA: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4) - ; HSA: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0 - ; HSA: $vgpr0 = COPY [[EXTRACT]](p6) - ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 - ; PAL-LABEL: name: external_constant32_got - ; PAL: bb.1 (%ir-block.0): - ; PAL: liveins: $sgpr30_sgpr31 - ; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET @external_constant32 + 4, 0, implicit-def $scc - ; PAL: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0 - ; PAL: $vgpr0 = COPY [[EXTRACT]](p6) - ; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; GCN-LABEL: name: external_constant32_got + ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant32 + 4, target-flags(amdgpu-gotprel32-hi) @external_constant32 + 4, implicit-def $scc + ; GCN: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4) + ; GCN: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0 + ; GCN: $vgpr0 = COPY [[EXTRACT]](p6) + ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 ret i32 addrspace(6)* @external_constant32 } define i32 addrspace(6)* @internal_constant32_pcrel() { - ; HSA-LABEL: name: internal_constant32_pcrel - ; HSA: bb.1 (%ir-block.0): - ; HSA: liveins: $sgpr30_sgpr31 - ; HSA: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; HSA: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant32 + 4, target-flags(amdgpu-rel32-hi) @internal_constant32 + 4, implicit-def $scc - ; HSA: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0 - ; HSA: $vgpr0 = COPY [[EXTRACT]](p6) - ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 - ; PAL-LABEL: name: internal_constant32_pcrel - ; PAL: bb.1 (%ir-block.0): - ; PAL: liveins: $sgpr30_sgpr31 - ; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET @internal_constant32 + 4, 0, implicit-def $scc - ; PAL: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0 - ; PAL: $vgpr0 = COPY [[EXTRACT]](p6) - ; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; GCN-LABEL: name: internal_constant32_pcrel + ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant32 + 4, target-flags(amdgpu-rel32-hi) @internal_constant32 + 4, implicit-def $scc + ; GCN: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0 + ; GCN: $vgpr0 = COPY [[EXTRACT]](p6) + ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 ret i32 addrspace(6)* @internal_constant32 } diff --git a/llvm/test/CodeGen/AMDGPU/global-constant.ll b/llvm/test/CodeGen/AMDGPU/global-constant.ll --- a/llvm/test/CodeGen/AMDGPU/global-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/global-constant.ll @@ -1,7 +1,7 @@ -; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=PAL %s -; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOPAL %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOPAL %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOPAL %s +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefix=R600 %s @private1 = private unnamed_addr addrspace(4) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0] @@ -11,23 +11,12 @@ ; GCN-LABEL: {{^}}private_test: ; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}} -; PAL OSes use fixup into .text section. -; PAL: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], private1 -; PAL: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], 0 - -; Non-PAL OSes use relocations. -; NOPAL: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], private1@rel32@lo+4 -; NOPAL: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], private1@rel32@hi+4 - +; Non-R600 OSes use relocations. +; GCN: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], private1@rel32@lo+4 +; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], private1@rel32@hi+4 ; GCN: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}} - -; PAL OSes use fixup into .text section. -; PAL: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], private2 -; PAL: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], 0 - -; Non-PAL OSes use relocations. -; NOPAL: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], private2@rel32@lo+4 -; NOPAL: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], private2@rel32@hi+4 +; GCN: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], private2@rel32@lo+4 +; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], private2@rel32@hi+4 ; R600-LABEL: private_test define amdgpu_kernel void @private_test(i32 %index, float addrspace(1)* %out) { @@ -40,10 +29,10 @@ ret void } -; NOPAL-LABEL: {{^}}available_externally_test: -; NOPAL: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}} -; NOPAL: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], available_externally@gotpcrel32@lo+4 -; NOPAL: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], available_externally@gotpcrel32@hi+4 +; GCN-LABEL: {{^}}available_externally_test: +; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}} +; GCN: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], available_externally@gotpcrel32@lo+4 +; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], available_externally@gotpcrel32@hi+4 ; R600-LABEL: available_externally_test define amdgpu_kernel void @available_externally_test(i32 addrspace(1)* %out) { %ptr = getelementptr [256 x i32], [256 x i32] addrspace(4)* @available_externally, i32 0, i32 1 @@ -52,8 +41,7 @@ ret void } -; PAL: .text -; NOPAL: .section .rodata +; GCN: .section .rodata ; R600: .text ; GCN: private1: