diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -133,6 +133,7 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { MCStreamer &Streamer; + Triple::OSType Os; void EmitNote(StringRef Name, const MCExpr *DescSize, unsigned NoteType, function_ref EmitDesc); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -393,9 +393,9 @@ // AMDGPUTargetELFStreamer //===----------------------------------------------------------------------===// -AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer( - MCStreamer &S, const MCSubtargetInfo &STI) - : AMDGPUTargetStreamer(S), Streamer(S) { +AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S, + const MCSubtargetInfo &STI) + : AMDGPUTargetStreamer(S), Streamer(S), Os(STI.getTargetTriple().getOS()) { MCAssembler &MCA = getStreamer().getAssembler(); unsigned EFlags = MCA.getELFHeaderEFlags(); @@ -438,9 +438,15 @@ auto NameSZ = Name.size() + 1; + unsigned NoteFlags = 0; + // TODO Apparently, this is currently needed for OpenCL as mentioned in + // https://reviews.llvm.org/D74995 + if (Os == Triple::AMDHSA) + NoteFlags = ELF::SHF_ALLOC; + S.PushSection(); - S.SwitchSection(Context.getELFSection( - ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC)); + S.SwitchSection( + Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags)); S.emitInt32(NameSZ); // namesz S.emitValue(DescSZ, 4); // descz S.emitInt32(NoteType); // type diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll b/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=kaveri -filetype=obj -mattr=-code-object-v3 | llvm-readobj -symbols -s -sd | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=kaveri -mattr=-code-object-v3 | llvm-mc -filetype=obj -triple amdgcn--amdpal -mcpu=kaveri -mattr=-code-object-v3 | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64,-code-object-v3 | FileCheck --check-prefix=GFX10-W32 %s +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64,-code-object-v3 | FileCheck --check-prefix=GFX10-W64 %s + +; ELF: Section { +; ELF: Name: .text +; ELF: Type: SHT_PROGBITS (0x1) +; ELF: Flags [ (0x6) +; ELF: SHF_ALLOC (0x2) +; ELF: SHF_EXECINSTR (0x4) +; ELF: } + +; ELF: SHT_NOTE +; ELF: Flags [ (0x0) +; ELF: ] + +; ELF: Symbol { +; ELF: Name: simple +; ELF: Size: 36 +; ELF: Section: .text (0x2) +; ELF: } + +; GFX10-W32: NumSGPRsForWavesPerEU: 4 +; GFX10-W32: NumVGPRsForWavesPerEU: 3 +; GFX10-W64: NumSGPRsForWavesPerEU: 2 +; GFX10-W64: NumVGPRsForWavesPerEU: 3 + +define amdgpu_kernel void @simple(i32 addrspace(1)* %out) { +entry: + store i32 0, i32 addrspace(1)* %out + ret void +}