Index: llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -476,18 +476,27 @@ if (!TRI.isVGPR(MRI, X)) return nullptr; + // One implicit operand is EXEC, the rest we may want to preserve as the + // instruction will be killed if combined. + if (MovT.getNumImplicitOperands() > (MovT.isCopy() ? 0 : 1)) + return nullptr; + const unsigned SearchLimit = 16; unsigned Count = 0; + bool KilledT = false; for (auto Iter = std::next(MovT.getIterator()), E = MovT.getParent()->instr_end(); - Iter != E && Count < SearchLimit; ++Iter, ++Count) { + Iter != E && Count < SearchLimit && !KilledT; ++Iter, ++Count) { MachineInstr *MovY = &*Iter; + KilledT = MovY->killsRegister(T, &TRI); + if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 && MovY->getOpcode() != AMDGPU::COPY) || !MovY->getOperand(1).isReg() || MovY->getOperand(1).getReg() != T || - MovY->getOperand(1).getSubReg() != Tsub) + MovY->getOperand(1).getSubReg() != Tsub || + MovY->getNumImplicitOperands() > (MovY->isCopy() ? 0 : 1)) continue; Register Y = MovY->getOperand(0).getReg(); @@ -521,24 +530,37 @@ MovX = nullptr; break; } + // Implicit use of M0 is an indirect move. + if (I->hasRegisterImplicitUseOperand(AMDGPU::M0)) + continue; + + if (Size > 1 && (I->getNumImplicitOperands() > (I->isCopy() ? 0 : 1))) + continue; + MovX = &*I; } if (!MovX) continue; - LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY); + LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << *MovY); for (unsigned I = 0; I < Size; ++I) { TargetInstrInfo::RegSubRegPair X1, Y1; X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI); Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI); - BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(), - TII->get(AMDGPU::V_SWAP_B32)) + MachineBasicBlock &MBB = *MovT.getParent(); + auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(), + TII->get(AMDGPU::V_SWAP_B32)) .addDef(X1.Reg, 0, X1.SubReg) .addDef(Y1.Reg, 0, Y1.SubReg) .addReg(Y1.Reg, 0, Y1.SubReg) .addReg(X1.Reg, 0, X1.SubReg).getInstr(); + if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) { + // Drop implicit EXEC. + MIB->RemoveOperand(MIB->getNumExplicitOperands()); + MIB->copyImplicitOps(*MBB.getParent(), *MovX); + } } MovX->eraseFromParent(); MovY->eraseFromParent(); Index: llvm/test/CodeGen/AMDGPU/v_swap_b32.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/v_swap_b32.mir +++ llvm/test/CodeGen/AMDGPU/v_swap_b32.mir @@ -517,7 +517,7 @@ ... # GCN-LABEL: name: swap_virt_copy_subreg_impdef_super -# GCN: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec +# GCN: %2.sub0:vreg_64 = COPY %0.sub0, implicit-def %2, implicit $exec --- name: swap_virt_copy_subreg_impdef_super registers: @@ -672,3 +672,192 @@ %1 = COPY %2 S_ENDPGM 0 ... + +# GCN-LABEL: name: swap_liveness_error_mov +# GCN: $vgpr6 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit killed $vgpr1_vgpr2 +# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr5, implicit $exec +# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec, implicit-def $vgpr5_vgpr6, implicit $vgpr6_vgpr7 +# GCN-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit $vgpr6_vgpr7 +# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec + +--- +name: swap_liveness_error_mov +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr5, $vgpr1_vgpr2 + + $vgpr6 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit killed $vgpr1_vgpr2 + $vgpr1 = V_MOV_B32_e32 killed $vgpr5, implicit $exec + $vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec, implicit-def $vgpr5_vgpr6, implicit $vgpr6_vgpr7 + $vgpr6 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit $vgpr6_vgpr7 + $vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec + S_ENDPGM 0 +... + +# GCN-LABEL: name: swap_liveness_error_copy +# GCN: $vgpr6 = COPY $vgpr1, implicit killed $vgpr1_vgpr2 +# GCN-NEXT: $vgpr1 = COPY killed $vgpr5 +# GCN-NEXT: $vgpr5 = COPY $vgpr6, implicit-def $vgpr5_vgpr6, implicit $vgpr6_vgpr7 +# GCN-NEXT: $vgpr6 = COPY $vgpr7, implicit $vgpr6_vgpr7 +# GCN-NEXT: $vgpr5 = COPY $vgpr6 + +--- +name: swap_liveness_error_copy +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr5, $vgpr1_vgpr2 + + $vgpr6 = COPY $vgpr1, implicit killed $vgpr1_vgpr2 + $vgpr1 = COPY killed $vgpr5 + $vgpr5 = COPY $vgpr6, implicit-def $vgpr5_vgpr6, implicit $vgpr6_vgpr7 + $vgpr6 = COPY $vgpr7, implicit $vgpr6_vgpr7 + $vgpr5 = COPY $vgpr6 + S_ENDPGM 0 +... + +# GCN-LABEL: name: swap_killed_t_early +# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec +# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec, implicit killed $vgpr2 +# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec +# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec +# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec + +--- +name: swap_killed_t_early +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec + $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec, implicit killed $vgpr2 + $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec + $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +# GCN-LABEL: name: swap_killed_t_late +# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec +# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec +# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec +# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr2 +# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec + +--- +name: swap_killed_t_late +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec + $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec + $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr2 + $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +# GCN-LABEL: name: swap_killed_x +# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec +# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec +# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec +# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr0 +# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec + +--- +name: swap_killed_x +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec + $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec + $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr0 + $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +# GCN-LABEL: name: indirect_mov_t +# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $m0 +# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec +# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec +# GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + +--- +name: indirect_mov_t +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $m0 + $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +# GCN-LABEL: name: indirect_mov_x +# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec +# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $m0 +# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec +# GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + +--- +name: indirect_mov_x +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec + $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $m0 + $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +# GCN-LABEL: name: indirect_mov_y +# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec +# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec +# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec, implicit $m0 +# GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + +--- +name: indirect_mov_y +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec + $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec, implicit $m0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +# GCN-LABEL: name: implicit_ops_mov_x_swap_b32 +# GCN: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec, implicit $vgpr2, implicit killed $vgpr1_vgpr2 + +--- +name: implicit_ops_mov_x_swap_b32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + $vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec + $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit $vgpr2, implicit killed $vgpr1_vgpr2 + $vgpr1 = V_MOV_B32_e32 killed $vgpr3, implicit $exec + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +# GCN-LABEL: name: implict_ops_mov_x_swap_b64 +# GCN: %2:vreg_64 = COPY %0 +# GCN-NEXT: %0:vreg_64 = COPY %1, implicit $vgpr0 +# GCN-NEXT: %1:vreg_64 = COPY %2 + +--- +name: implict_ops_mov_x_swap_b64 +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vreg_64 } + - { id: 2, class: vreg_64 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = COPY %0 + %0 = COPY %1, implicit $vgpr0 + %1 = COPY %2 +...