Index: lib/Target/AMDGPU/SIInsertSkips.cpp =================================================================== --- lib/Target/AMDGPU/SIInsertSkips.cpp +++ lib/Target/AMDGPU/SIInsertSkips.cpp @@ -145,6 +145,11 @@ return true; } + // We must branch over export. Export with no lanes enabled is not the same as + // not exporting at all, at least in some cases. + if (I->getOpcode() == AMDGPU::EXP || I->getOpcode() == AMDGPU::EXP_DONE) + return true; + ++NumInstr; if (NumInstr >= SkipThreshold) Index: test/CodeGen/AMDGPU/branch-over-exp.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/branch-over-exp.mir @@ -0,0 +1,28 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-skips -verify-machineinstrs -o - %s | FileCheck --enable-var-scope %s +# +# Check that we get a S_CBRANCH_EXECZ over an export. +# +# CHECK: S_CBRANCH_EXECZ + +--- +name: _func +body: | + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $vgpr0, $vgpr1, $vgpr2 + + V_CMP_LT_U32_e32 killed $vgpr0, killed $vgpr1, implicit-def $vcc, implicit $exec + $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec + SI_MASK_BRANCH %bb.2, implicit $exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + liveins: $vgpr2 + + EXP_DONE 32, killed renamable $vgpr2, undef renamable $vgpr0, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 1, implicit $exec + + bb.2: + S_ENDPGM + +...