Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -337,7 +337,13 @@ bool AMDGPUAsmPrinter::doFinalization(Module &M) { CallGraphResourceInfo.clear(); - if (AMDGPU::isGFX10(*getGlobalSTI())) { + // Pad with s_code_end to help tools and guard against instruction prefetch + // causing stale data in caches. Arguably this should be done by the linker, + // which is why this isn't done for Mesa. + const MCSubtargetInfo &STI = *getGlobalSTI(); + if (AMDGPU::isGFX10(STI) && + (STI.getTargetTriple().getOS() == Triple::AMDHSA || + STI.getTargetTriple().getOS() == Triple::AMDPAL)) { OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); getTargetStreamer()->EmitCodeEnd(); } Index: test/CodeGen/AMDGPU/s_code_end.ll =================================================================== --- test/CodeGen/AMDGPU/s_code_end.ll +++ test/CodeGen/AMDGPU/s_code_end.ll @@ -1,11 +1,13 @@ -; RUN: llc -march=amdgcn -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10,GFX10-ASM %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump -arch=amdgcn -mcpu=gfx1010 -disassemble - | FileCheck -check-prefixes=GCN,GCN-OBJ,GFX10,GFX10-OBJ %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10END,GFX10END-ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump -arch=amdgcn -mcpu=gfx1010 -disassemble - | FileCheck -check-prefixes=GCN,GCN-OBJ,GFX10END,GFX10END-OBJ %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10END,GFX10END-ASM %s +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10NOEND,GFX10NOEND-ASM %s +; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump -arch=amdgcn -mcpu=gfx1010 -disassemble - | FileCheck -check-prefixes=GCN,GCN-OBJ,GFX10NOEND,GFX10NOEND-OBJ %s ; GCN: a_kernel1: -; GCN-NEXT: s_endpgm -; GCN-ASM-NEXT: [[END_LABEL1:\.Lfunc_end.*]]: +; GCN: s_endpgm +; GCN-ASM: [[END_LABEL1:\.Lfunc_end.*]]: ; GCN-ASM-NEXT: .size a_kernel1, [[END_LABEL1]]-a_kernel1 -; GCN-ASM: .section .AMDGPU.config ; GCN-OBJ-NEXT: s_nop 0 @@ -14,19 +16,17 @@ } ; GCN: a_kernel2: -; GCN-NEXT: s_endpgm -; GCN-ASM-NEXT: [[END_LABEL2:\.Lfunc_end.*]]: +; GCN: s_endpgm +; GCN-ASM: [[END_LABEL2:\.Lfunc_end.*]]: ; GCN-ASM-NEXT: .size a_kernel2, [[END_LABEL2]]-a_kernel2 -; GCN-ASM: .section .AMDGPU.config -; GCN-OBJ-NEXT: {{^$}} +; GCN-OBJ: {{^$}} define amdgpu_kernel void @a_kernel2() { ret void } -; GCN-ASM: .text -; GCN-ASM-NEXT: .globl a_function +; GCN-ASM: .globl a_function ; GCN-ASM-NEXT: .p2align 2 ; GCN-ASM-NEXT: .type a_function,@function @@ -34,46 +34,48 @@ ; GCN: s_setpc_b64 ; GCN-ASM-NEXT: [[END_LABEL3:\.Lfunc_end.*]]: ; GCN-ASM-NEXT: .size a_function, [[END_LABEL3]]-a_function -; GFX10-ASM: .p2alignl 6, 3214868480 -; GFX10-ASM-NEXT: .fill 32, 4, 3214868480 +; GFX10END-ASM: .p2alignl 6, 3214868480 +; GFX10END-ASM-NEXT: .fill 32, 4, 3214868480 +; GFX10NOEND-NOT: .fill -; GFX10-OBJ-NEXT: s_code_end +; GFX10NOEND-OBJ-NOT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end -; GFX10-OBJ: s_code_end // 000000000140: -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end +; GFX10END-OBJ: s_code_end // 000000000140: +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end -; GFX10-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end +; GFX10END-OBJ-NEXT: s_code_end define void @a_function() { ret void