diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp @@ -35,6 +35,8 @@ StringRef SectionName = GO->getSection(); if (SectionName.startswith(".AMDGPU.comment.")) SK = SectionKind::getMetadata(); + else if (SectionName.startswith(".text")) + SK = SectionKind::getText(); return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, SK, TM); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4648,7 +4648,8 @@ const Triple &TT = getTargetMachine().getTargetTriple(); return (GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) && - AMDGPU::shouldEmitConstantsToTextSection(TT); + (AMDGPU::shouldEmitConstantsToTextSection(TT) || + GV->getSection().startswith(".text")); } bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -568,7 +568,7 @@ } bool shouldEmitConstantsToTextSection(const Triple &TT) { - return TT.getOS() == Triple::AMDPAL || TT.getArch() == Triple::r600; + return TT.getArch() == Triple::r600; } int getIntegerAttribute(const Function &F, StringRef Name, int Default) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=HSA %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=PAL %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -stop-after=legalizer < %s | FileCheck -check-prefix=HSA %s @external_constant = external addrspace(4) constant i32, align 4 @external_constant32 = external addrspace(6) constant i32, align 4 @@ -8,6 +8,7 @@ @external_other = external addrspace(999) global i32, align 4 @internal_constant = internal addrspace(4) constant i32 9, align 4 +@internal_constant_text = internal addrspace(4) constant i32 9, align 4, section ".text" @internal_constant32 = internal addrspace(6) constant i32 9, align 4 @internal_global = internal addrspace(1) global i32 9, align 4 @internal_other = internal addrspace(999) global i32 9, align 4 @@ -25,16 +26,6 @@ ; HSA: $vgpr1 = COPY [[UV1]](s32) ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 - ; PAL-LABEL: name: external_constant_got - ; PAL: bb.1 (%ir-block.0): - ; PAL: liveins: $sgpr30_sgpr31 - ; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET @external_constant + 4, 0, implicit-def $scc - ; PAL: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4) - ; PAL: $vgpr0 = COPY [[UV]](s32) - ; PAL: $vgpr1 = COPY [[UV1]](s32) - ; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(4)* @external_constant } @@ -50,17 +41,6 @@ ; HSA: $vgpr1 = COPY [[UV1]](s32) ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 - ; PAL-LABEL: name: external_global_got - ; PAL: bb.1 (%ir-block.0): - ; PAL: liveins: $sgpr30_sgpr31 - ; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 4, implicit-def $scc - ; PAL: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4) - ; PAL: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) - ; PAL: $vgpr0 = COPY [[UV]](s32) - ; PAL: $vgpr1 = COPY [[UV1]](s32) - ; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(1)* @external_global } @@ -76,17 +56,6 @@ ; HSA: $vgpr1 = COPY [[UV1]](s32) ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 - ; PAL-LABEL: name: external_other_got - ; PAL: bb.1 (%ir-block.0): - ; PAL: liveins: $sgpr30_sgpr31 - ; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 4, implicit-def $scc - ; PAL: [[LOAD:%[0-9]+]]:_(p999) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load 8 from got, addrspace 4) - ; PAL: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p999) - ; PAL: $vgpr0 = COPY [[UV]](s32) - ; PAL: $vgpr1 = COPY [[UV1]](s32) - ; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(999)* @external_other } @@ -101,19 +70,23 @@ ; HSA: $vgpr1 = COPY [[UV1]](s32) ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 - ; PAL-LABEL: name: internal_constant_pcrel - ; PAL: bb.1 (%ir-block.0): - ; PAL: liveins: $sgpr30_sgpr31 - ; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET @internal_constant + 4, 0, implicit-def $scc - ; PAL: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4) - ; PAL: $vgpr0 = COPY [[UV]](s32) - ; PAL: $vgpr1 = COPY [[UV1]](s32) - ; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(4)* @internal_constant } +define i32 addrspace(4)* @internal_constant_text_pcrel() { + ; HSA-LABEL: name: internal_constant_text_pcrel + ; HSA: bb.1 (%ir-block.0): + ; HSA: liveins: $sgpr30_sgpr31 + ; HSA: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; HSA: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET @internal_constant_text + 4, 0, implicit-def $scc + ; HSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4) + ; HSA: $vgpr0 = COPY [[UV]](s32) + ; HSA: $vgpr1 = COPY [[UV1]](s32) + ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ret i32 addrspace(4)* @internal_constant_text +} + define i32 addrspace(1)* @internal_global_pcrel() { ; HSA-LABEL: name: internal_global_pcrel ; HSA: bb.1 (%ir-block.0): @@ -125,16 +98,6 @@ ; HSA: $vgpr1 = COPY [[UV1]](s32) ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 - ; PAL-LABEL: name: internal_global_pcrel - ; PAL: bb.1 (%ir-block.0): - ; PAL: liveins: $sgpr30_sgpr31 - ; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 4, implicit-def $scc - ; PAL: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1) - ; PAL: $vgpr0 = COPY [[UV]](s32) - ; PAL: $vgpr1 = COPY [[UV1]](s32) - ; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(1)* @internal_global } @@ -149,16 +112,6 @@ ; HSA: $vgpr1 = COPY [[UV1]](s32) ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 - ; PAL-LABEL: name: internal_other_pcrel - ; PAL: bb.1 (%ir-block.0): - ; PAL: liveins: $sgpr30_sgpr31 - ; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other + 4, target-flags(amdgpu-rel32-hi) @internal_other + 4, implicit-def $scc - ; PAL: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p999) - ; PAL: $vgpr0 = COPY [[UV]](s32) - ; PAL: $vgpr1 = COPY [[UV1]](s32) - ; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(999)* @internal_other } @@ -173,15 +126,6 @@ ; HSA: $vgpr0 = COPY [[EXTRACT]](p6) ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 - ; PAL-LABEL: name: external_constant32_got - ; PAL: bb.1 (%ir-block.0): - ; PAL: liveins: $sgpr30_sgpr31 - ; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET @external_constant32 + 4, 0, implicit-def $scc - ; PAL: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0 - ; PAL: $vgpr0 = COPY [[EXTRACT]](p6) - ; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 ret i32 addrspace(6)* @external_constant32 } @@ -195,14 +139,5 @@ ; HSA: $vgpr0 = COPY [[EXTRACT]](p6) ; HSA: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; HSA: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 - ; PAL-LABEL: name: internal_constant32_pcrel - ; PAL: bb.1 (%ir-block.0): - ; PAL: liveins: $sgpr30_sgpr31 - ; PAL: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; PAL: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET @internal_constant32 + 4, 0, implicit-def $scc - ; PAL: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0 - ; PAL: $vgpr0 = COPY [[EXTRACT]](p6) - ; PAL: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; PAL: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 ret i32 addrspace(6)* @internal_constant32 } diff --git a/llvm/test/CodeGen/AMDGPU/elf.rodata.ll b/llvm/test/CodeGen/AMDGPU/elf.rodata.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/elf.rodata.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=amdgcn -mcpu=fiji -filetype=obj | llvm-readobj --symbols -S --sd - | FileCheck %s + +; CHECK: Section { +; CHECK: Name: .text +; CHECK: Type: SHT_PROGBITS (0x1) +; CHECK: Flags [ (0x6) +; CHECK: Size: 16 +; CHECK: SectionData ( +; CHECK: 0000: 414D4431 414D4431 414D4431 414D4431 |AMD1AMD1AMD1AMD1| +; CHECK: ) +; CHECK: } + +; CHECK: Section { +; CHECK: Name: .rodata +; CHECK: Type: SHT_PROGBITS (0x1) +; CHECK: Flags [ (0x2) +; CHECK: Size: 16 +; CHECK: SectionData ( +; CHECK: 0000: 414D4432 414D4432 414D4432 414D4432 |AMD2AMD2AMD2AMD2| +; CHECK: ) +; CHECK: } + + +@rodata_info_var_1 = internal global [4 x i32][i32 826559809, i32 826559809, i32 826559809, i32 826559809], section ".text" +@rodata_info_var_2 = constant [4 x i32][i32 843337025, i32 843337025, i32 843337025, i32 843337025] diff --git a/llvm/test/CodeGen/AMDGPU/global-constant.ll b/llvm/test/CodeGen/AMDGPU/global-constant.ll --- a/llvm/test/CodeGen/AMDGPU/global-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/global-constant.ll @@ -5,29 +5,22 @@ ; RUN: llc -mtriple=r600-- -mcpu=cypress < %s | FileCheck -check-prefix=R600 %s @private1 = private unnamed_addr addrspace(4) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0] -@private2 = private unnamed_addr addrspace(4) constant [4 x float] [float 4.0, float 5.0, float 6.0, float 7.0] +@private2 = private unnamed_addr addrspace(4) constant [4 x float] [float 4.0, float 5.0, float 6.0, float 7.0], section ".textual" @available_externally = available_externally addrspace(4) global [256 x i32] zeroinitializer ; GCN-LABEL: {{^}}private_test: ; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}} -; PAL OSes use fixup into .text section. -; PAL: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], private1 -; PAL: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], 0 - -; Non-PAL OSes use relocations. -; NOPAL: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], private1@rel32@lo+4 -; NOPAL: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], private1@rel32@hi+4 +; All OSes use relocations for private1. +; GCN: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], private1@rel32@lo+4 +; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], private1@rel32@hi+4 ; GCN: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}} -; PAL OSes use fixup into .text section. -; PAL: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], private2 -; PAL: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], 0 - -; Non-PAL OSes use relocations. -; NOPAL: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], private2@rel32@lo+4 -; NOPAL: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], private2@rel32@hi+4 +; All OSes use fixup into .text section for private2, because of its explicit +; section name starting with ".text". +; GCN: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], private2 +; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], 0 ; R600-LABEL: private_test define amdgpu_kernel void @private_test(i32 %index, float addrspace(1)* %out) {