Index: lib/Target/AMDGPU/SIInsertSkips.cpp =================================================================== --- lib/Target/AMDGPU/SIInsertSkips.cpp +++ lib/Target/AMDGPU/SIInsertSkips.cpp @@ -143,6 +143,18 @@ return true; } + // These instructions cause shader I/O that may cause hardware lockups + // when executed with an empty EXEC mask. + // + // Note: exp with VM = DONE = 0 is automatically skipped by hardware when + // EXEC = 0, but checking for that case here seems not worth it + // given the typical code patterns. + if ((I->getOpcode() == AMDGPU::S_SENDMSG) || + (I->getOpcode() == AMDGPU::S_SENDMSGHALT) || + (I->getOpcode() == AMDGPU::EXP) || + (I->getOpcode() == AMDGPU::EXP_DONE)) + return true; + if (I->isInlineAsm()) { const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); const char *AsmStr = I->getOperand(0).getSymbolName(); Index: test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll @@ -480,5 +480,65 @@ ret void } +; GCN-LABEL: {{^}}test_if_export_f32: +; GCN: s_cbranch_execz +; GCN: exp +define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 { + %cc = icmp eq i32 %flag, 0 + br i1 %cc, label %end, label %exp + +exp: + call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false) + br label %end + +end: + ret void +} + +; GCN-LABEL: {{^}}test_if_export_vm_f32: +; GCN: s_cbranch_execz +; GCN: exp +define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 { + %cc = icmp eq i32 %flag, 0 + br i1 %cc, label %end, label %exp + +exp: + call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true) + br label %end + +end: + ret void +} + +; GCN-LABEL: {{^}}test_if_export_done_f32: +; GCN: s_cbranch_execz +; GCN: exp +define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 { + %cc = icmp eq i32 %flag, 0 + br i1 %cc, label %end, label %exp + +exp: + call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false) + br label %end + +end: + ret void +} + +; GCN-LABEL: {{^}}test_if_export_vm_done_f32: +; GCN: s_cbranch_execz +; GCN: exp +define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 { + %cc = icmp eq i32 %flag, 0 + br i1 %cc, label %end, label %exp + +exp: + call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true) + br label %end + +end: + ret void +} + attributes #0 = { nounwind } attributes #1 = { nounwind inaccessiblememonly } Index: test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll @@ -136,6 +136,21 @@ ret void } +; GCN-LABEL: {{^}}if_sendmsg: +; GCN: s_cbranch_execz +; GCN: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP) +define amdgpu_gs void @if_sendmsg(i32 %flag) #0 { + %cc = icmp eq i32 %flag, 0 + br i1 %cc, label %sendmsg, label %end + +sendmsg: + call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0) + br label %end + +end: + ret void +} + declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0 declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0