diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -1373,6 +1373,10 @@ Needs = StateExact | StateWQM | StateStrict; } + // Exact mode exit can occur in terminators, but must be before branches. + if (MI.isBranch() && OutNeeds == StateExact) + Needs = StateExact; + ++Next; } else { // End of basic block diff --git a/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir b/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir --- a/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir +++ b/llvm/test/CodeGen/AMDGPU/wqm-terminators.mir @@ -36,9 +36,11 @@ ; CHECK-NEXT: S_CMP_EQ_U32 [[COPY1]], 0, implicit-def $scc ; CHECK-NEXT: undef %5.sub0:vreg_64 = V_MUL_F32_e64 0, [[COPY2]].sub0, 0, [[COPY2]].sub1, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: %5.sub1:vreg_64 = V_MUL_F32_e64 0, [[COPY2]].sub0, 0, [[COPY2]].sub1, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc + ; CHECK-NEXT: $exec_lo = S_AND_B32 $exec_lo, [[COPY]], implicit-def $scc + ; CHECK-NEXT: $scc = COPY [[COPY3]] ; CHECK-NEXT: [[IMAGE_SAMPLE_V3_V2_gfx10_:%[0-9]+]]:vreg_96 = IMAGE_SAMPLE_V3_V2_gfx10 %5, [[DEF]], [[DEF1]], 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8) ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc - ; CHECK-NEXT: $exec_lo = S_AND_B32_term $exec_lo, [[COPY]], implicit-def $scc ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: