Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -164,7 +164,20 @@ } bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { - OW->WriteZeros(Count); + // If the count is not 4-byte aligned, we must be writing data into the text + // section (otherwise we have unaligned instructions, and thus have far + // bigger problems), so just write zeros instead. + OW->WriteZeros(Count % 4); + + // We are properly aligned, so write NOPs as requested. + Count /= 4; + + // FIXME: R600 support. + // s_nop 0 + const uint32_t Encoded_S_NOP_0 = 0xbf800000; + + for (uint64_t I = 0; I != Count; ++I) + OW->write32(Encoded_S_NOP_0); return true; } Index: test/CodeGen/AMDGPU/nop-data.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/nop-data.ll @@ -0,0 +1,87 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -filetype=obj < %s | llvm-objdump -d - -mcpu=fiji | FileCheck %s + +; CHECK: kernel0: +; CHECK-NEXT: s_endpgm +define amdgpu_kernel void @kernel0() align 256 { +entry: + ret void +} + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 + +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_nop 0 // 0000000001FC: BF800000 + +; CHECK-NEXT: {{^$}} +; CHECK-NEXT: kernel1: +; CHECK-NEXT: s_endpgm +define amdgpu_kernel void @kernel1(i32 addrspace(1)* addrspace(2)* %ptr.out) align 256 { +entry: + ret void +}