diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5278,7 +5278,7 @@ .addReg(Exec) .addReg(SaveExec); - BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(&LoopBB); + BuildMI(LoopBB, I, DL, TII.get(AMDGPU::SI_WATERFALL_LOOP)).addMBB(&LoopBB); } // Build a waterfall loop around \p MI, replacing the VGPR \p Rsrc register diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -318,6 +318,14 @@ let hasSideEffects = 1; } +def SI_WATERFALL_LOOP : CFPseudoInstSI < + (outs), + (ins brtarget:$target), [], 1> { + let Size = 8; + let isBranch = 1; + let Defs = []; +} + def SI_LOOP : CFPseudoInstSI < (outs), (ins SReg_1:$saved, brtarget:$target), [(AMDGPUloop i1:$saved, bb:$target)], 1, 1> { diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -600,6 +600,10 @@ emitLoop(MI); break; + case AMDGPU::SI_WATERFALL_LOOP: + MI.setDesc(TII->get(AMDGPU::S_CBRANCH_EXECNZ)); + break; + case AMDGPU::SI_END_CF: SplitBB = emitEndCf(MI); break; @@ -840,6 +844,7 @@ case AMDGPU::SI_IF: case AMDGPU::SI_ELSE: case AMDGPU::SI_IF_BREAK: + case AMDGPU::SI_WATERFALL_LOOP: case AMDGPU::SI_LOOP: case AMDGPU::SI_END_CF: SplitMBB = process(MI); diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir @@ -30,7 +30,7 @@ # W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec # W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec # W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc -# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec +# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec # W64-LABEL: bb.2: # W64: $exec = S_MOV_B64 [[SAVEEXEC]] @@ -55,7 +55,7 @@ # W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec # TODO: S_XOR_B32_term should be `implicit-def $scc` # W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]] -# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec +# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec # W32-LABEL: bb.2: # W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]] --- @@ -103,7 +103,7 @@ # W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec # W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec # W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc -# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec +# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec # W64-LABEL: bb.2: # W64: $exec = S_MOV_B64 [[SAVEEXEC]] @@ -128,7 +128,7 @@ # W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec # TODO: S_XOR_B32_term should be `implicit-def $scc` # W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]] -# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec +# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec # W32-LABEL: bb.2: # W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]] --- @@ -176,7 +176,7 @@ # W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec # W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec # W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc -# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec +# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec # W64-LABEL: bb.2: # W64: $exec = S_MOV_B64 [[SAVEEXEC]] @@ -201,7 +201,7 @@ # W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec # TODO: S_XOR_B32_term should be `implicit-def $scc` # W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]] -# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec +# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec # W32-LABEL: bb.2: # W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]] --- @@ -286,7 +286,7 @@ # W64-NO-ADDR64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec # W64-NO-ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec # W64-NO-ADDR64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc -# W64-NO-ADDR64: S_CBRANCH_EXECNZ %bb.1, implicit $exec +# W64-NO-ADDR64: SI_WATERFALL_LOOP %bb.1, implicit $exec # W64-NO-ADDR64-LABEL: bb.2: # W64-NO-ADDR64: $exec = S_MOV_B64 [[SAVEEXEC]] @@ -309,7 +309,7 @@ # W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec # TODO: S_XOR_B32_term should be `implicit-def $scc` # W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]] -# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec +# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec # W32-LABEL: bb.2: # W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]