diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1677,6 +1677,11 @@ IntrHasSideEffects]> { } +def int_amdgcn_s_nop : + DefaultAttrsIntrinsic<[], [llvm_i16_ty], [ImmArg>, IntrNoMem, + IntrHasSideEffects]> { +} + def int_amdgcn_s_incperflevel : ClangBuiltin<"__builtin_amdgcn_s_incperflevel">, DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg>, IntrNoMem, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -138,6 +138,8 @@ Opcode == AMDGPU::SI_TCRETURN_GFX) { // TODO: How to use branch immediate and avoid register+add? Opcode = AMDGPU::S_SETPC_B64; + } else if (Opcode == AMDGPU::S_NOP_se) { + Opcode = AMDGPU::S_NOP; } int MCOpcode = TII->pseudoToMCOpcode(Opcode); diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1163,6 +1163,12 @@ def S_NOP : SOPP_Pseudo<"s_nop" , (ins i16imm:$simm16), "$simm16">; +// Define variant marked as having side effects for use with intrinsic +def S_NOP_se : SOPP_Pseudo<"" , (ins i16imm:$simm16), "$simm16", + [(int_amdgcn_s_nop timm:$simm16)]> { + let hasSideEffects = 1; +} + let isTerminator = 1 in { def S_ENDPGM : SOPP_Pseudo<"s_endpgm", (ins Endpgm:$simm16), "$simm16", [], ""> { let isBarrier = 1; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.nop.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.nop.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.nop.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +define amdgpu_kernel void @test_s_nop() { +; GCN-LABEL: test_s_nop: +; GCN: ; %bb.0: +; GCN-NEXT: s_nop 0 +; GCN-NEXT: s_nop 1 +; GCN-NEXT: s_nop 2 +; GCN-NEXT: s_nop 3 +; GCN-NEXT: s_nop 4 +; GCN-NEXT: s_nop 5 +; GCN-NEXT: s_nop 6 +; GCN-NEXT: s_nop 7 +; GCN-NEXT: s_nop 63 +; GCN-NEXT: s_endpgm + call void @llvm.amdgcn.s.nop(i16 0) + call void @llvm.amdgcn.s.nop(i16 1) + call void @llvm.amdgcn.s.nop(i16 2) + call void @llvm.amdgcn.s.nop(i16 3) + call void @llvm.amdgcn.s.nop(i16 4) + call void @llvm.amdgcn.s.nop(i16 5) + call void @llvm.amdgcn.s.nop(i16 6) + call void @llvm.amdgcn.s.nop(i16 7) + call void @llvm.amdgcn.s.nop(i16 63) + ret void +} + +declare void @llvm.amdgcn.s.nop(i16)