Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2486,7 +2486,7 @@ Opcode == AMDGPU::DS_ORDERED_COUNT) return true; - if (MI.isInlineAsm()) + if (MI.isCall() || MI.isInlineAsm()) return true; // conservative assumption // These are like SALU instructions in terms of effects, so it's questionable Index: test/CodeGen/AMDGPU/call-skip.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/call-skip.ll @@ -0,0 +1,67 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -enable-var-scope -check-prefix=GCN %s + +; A call should be skipped if all lanes are zero, since we don't know +; what side effects should be avoided inside the call. +define hidden void @func() #1 { + ret void +} + +; GCN-LABEL: {{^}}if_call: +; GCN: s_and_saveexec_b64 +; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]] +; GCN-NEXT: s_cbranch_execz [[END]] +; GCN: s_swappc_b64 +; GCN: [[END]]: +define void @if_call(i32 %flag) #0 { + %cc = icmp eq i32 %flag, 0 + br i1 %cc, label %call, label %end + +call: + call void @func() + br label %end + +end: + ret void +} + +; GCN-LABEL: {{^}}if_asm: +; GCN: s_and_saveexec_b64 +; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]] +; GCN-NEXT: s_cbranch_execz [[END]] +; GCN: ; sample asm +; GCN: [[END]]: +define void @if_asm(i32 %flag) #0 { + %cc = icmp eq i32 %flag, 0 + br i1 %cc, label %call, label %end + +call: + call void asm sideeffect "; sample asm", ""() + br label %end + +end: + ret void +} + +; GCN-LABEL: {{^}}if_call_kernel: +; GCN: s_and_saveexec_b64 +; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]] +; GCN-NEXT: s_cbranch_execz [[END]] +; GCN: s_swappc_b64 +define amdgpu_kernel void @if_call_kernel() #0 { + %id = call i32 @llvm.amdgcn.workitem.id.x() + %cc = icmp eq i32 %id, 0 + br i1 %cc, label %call, label %end + +call: + call void @func() + br label %end + +end: + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind noinline } +attributes #2 = { nounwind readnone speculatable }