Index: lib/Target/AMDGPU/SIShrinkInstructions.cpp =================================================================== --- lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -431,6 +431,84 @@ return TargetInstrInfo::RegSubRegPair(Reg, Sub); } +// Match: +// bb0: +// .. +// mov r, imm +// .. +// branch bb1 +// +// bb1: +// ; predecessors: %bb.0 +// .. +// mov r, imm <== redundant mov +// +// +// Returns true if the mov can be removed. +// +static bool isMovRedundant(MachineInstr &Mov, MachineRegisterInfo &MRI, + const SIInstrInfo *TII) { + + assert(Mov.getOpcode() == AMDGPU::V_MOV_B32_e32 || + Mov.getOpcode() == AMDGPU::S_MOV_B32 || + Mov.getOpcode() == AMDGPU::S_MOV_B64); + assert(Mov.getParent()->pred_size() == 1); + + auto DstOperand = Mov.getOpcode() == AMDGPU::V_MOV_B32_e32 + ? TII->getNamedOperand(Mov, AMDGPU::OpName::vdst) + : TII->getNamedOperand(Mov, AMDGPU::OpName::sdst); + + unsigned R = DstOperand->getReg(); + unsigned Rsub = DstOperand->getSubReg(); + auto MBB = Mov.getParent(); + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + + // Make sure that 'R' is not modified between the MOVs in 'MBB'. + auto I = std::next(Mov.getReverseIterator()), E = MBB->instr_rend(); + for (; I != E; ++I) { + if (instModifiesReg(&*I, R, Rsub, TRI)) + return false; + } + + unsigned Op = Mov.getOpcode(); + auto SrcOperand = TII->getNamedOperand(Mov, AMDGPU::OpName::src0); + assert(SrcOperand->isImm()); + const int64_t Imm = SrcOperand->getImm(); + + // Look for the same MOV in the predecessor. + auto Pred = *MBB->pred_begin(); + I = Pred->instr_rbegin(), E = Pred->instr_rend(); + for (; I != E; ++I) { + + MachineInstr *Instr = &*I; + auto InstrSrcOp = TII->getNamedOperand(*Instr, AMDGPU::OpName::src0); + auto InstrDstOp = Instr->getOpcode() == AMDGPU::V_MOV_B32_e32 + ? TII->getNamedOperand(*Instr, AMDGPU::OpName::vdst) + : TII->getNamedOperand(*Instr, AMDGPU::OpName::sdst); + + if (Instr->getOpcode() == Op && + InstrDstOp->getReg() == R && + InstrDstOp->getSubReg() == Rsub && + InstrSrcOp->isImm() && + InstrSrcOp->getImm() == Imm) { + break; + } + + // Make sure that 'R' is not modified between the MOVs in 'Pred'. + if (instModifiesReg(Instr, R, Rsub, TRI)) + return false; + } + + // If the same MOV was not found in the predecessor, bail out. + if (I == E) + return false; + + if (MRI.tracksLiveness() && !MBB->isLiveIn(R)) + MBB->addLiveIn(R); + + return true; +} + // Match: // mov t, x // mov x, y @@ -593,6 +671,23 @@ } } + if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 || + MI.getOpcode() == AMDGPU::S_MOV_B32 || + MI.getOpcode() == AMDGPU::S_MOV_B64) { + // If the MOV is identical to a MOV in the immediate predecessor + // of MBB and also no instruction between them modifies the destination + // register, then remove the MOV. + MachineOperand &Src = MI.getOperand(1); + if (Src.isImm() && + TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) { + + if (MBB.pred_size() == 1 && isMovRedundant(MI, MRI, TII)) { + MI.eraseFromParent(); + continue; + } + } + } + // Combine adjacent s_nops to use the immediate operand encoding how long // to wait. // Index: test/CodeGen/AMDGPU/control-flow-fastregalloc.ll =================================================================== --- test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -38,7 +38,6 @@ ; GCN: mask branch [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN: {{^}}BB{{[0-9]+}}_1: ; %if -; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD1:v[0-9]+]] ; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0) Index: test/CodeGen/AMDGPU/remove-redundant-mov.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/remove-redundant-mov.mir @@ -0,0 +1,284 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: v_mov_redundant_move_single +# GCN: bb.1: +# GCN-NOT: $vgpr2 = V_MOV_B32_e32 1065353216 +# GCN: $vgpr3 = V_MOV_B32_e32 $vgpr2 +--- +name: v_mov_redundant_move_single +body: | + bb.0: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: v_mov_redundant_move_liveness +# GCN: bb.1: +# GCN: liveins: $vgpr2 +# GCN-NOT: $vgpr2 = V_MOV_B32_e32 1065353216 +# GCN: $vgpr3 = V_MOV_B32_e32 $vgpr2 +--- +name: v_mov_redundant_move_liveness +tracksRegLiveness: true +body: | + bb.0: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: v_mov_redundant_move_multiple +# GCN: bb.1: +# GCN-NOT: $vgpr2 = V_MOV_B32_e32 0 +# GCN-NOT: $vgpr1 = V_MOV_B32_e32 1065353216 +# GCN-NOT: $vgpr4 = V_MOV_B32_e32 3204448256 +# GCN-NOT: $vgpr3 = V_MOV_B32_e32 1056964608 +# GCN: $vgpr3 = V_MOV_B32_e32 $vgpr2 +--- +name: v_mov_redundant_move_multiple +body: | + bb.0: + renamable $vgpr1 = V_MOV_B32_e32 1065353216, implicit $exec + renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr3 = V_MOV_B32_e32 1056964608, implicit $exec + renamable $vgpr4 = V_MOV_B32_e32 3204448256, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr1 = V_MOV_B32_e32 1065353216, implicit $exec + renamable $vgpr4 = V_MOV_B32_e32 3204448256, implicit $exec + renamable $vgpr3 = V_MOV_B32_e32 1056964608, implicit $exec + $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: v_mov_necessary_move_not_removed_1 +# GCN: bb.1: +# GCN: $vgpr2 = V_MOV_B32_e32 1065353216 +# GCN: $vgpr3 = V_MOV_B32_e32 $vgpr2 +--- +name: v_mov_necessary_move_not_removed_1 +body: | + bb.0: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: v_mov_necessary_move_not_removed_2 +# GCN: bb.1: +# GCN: $vgpr2 = V_MOV_B32_e32 1065353216 +# GCN: $vgpr3 = V_MOV_B32_e32 $vgpr2 +--- +name: v_mov_necessary_move_not_removed_2 +body: | + bb.0: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $exec +... + + + +# GCN-LABEL: name: s_mov_32_redundant_move_single +# GCN: bb.1: +# GCN-NOT: $sgpr11 = S_MOV_B32 1065353216 +# GCN: $sgpr12 = S_MOV_B32 $sgpr11 +--- +name: s_mov_32_redundant_move_single +body: | + bb.0: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + $sgpr12 = S_MOV_B32 $sgpr11, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_32_redundant_move_liveness +# GCN: bb.1: +# GCN: liveins: $sgpr11 +# GCN-NOT: $sgpr11 = S_MOV_B32 1065353216 +# GCN: $sgpr12 = S_MOV_B32 $sgpr11 +--- +name: s_mov_32_redundant_move_liveness +tracksRegLiveness: true +body: | + bb.0: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + $sgpr12 = S_MOV_B32 $sgpr11, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_32_redundant_move_multiple +# GCN: bb.1: +# GCN-NOT: $sgpr11 = S_MOV_B32 0 +# GCN-NOT: $sgpr10 = S_MOV_B32 1065353216 +# GCN-NOT: $sgpr13 = S_MOV_B32 3204448256 +# GCN-NOT: $sgpr12 = S_MOV_B32 1056964608 +# GCN: $sgpr12 = S_MOV_B32 $sgpr11 +--- +name: s_mov_32_redundant_move_multiple +body: | + bb.0: + renamable $sgpr10 = S_MOV_B32 1065353216, implicit $exec + renamable $sgpr11 = S_MOV_B32 0, implicit $exec + renamable $sgpr12 = S_MOV_B32 1056964608, implicit $exec + renamable $sgpr13 = S_MOV_B32 3204448256, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr11 = S_MOV_B32 0, implicit $exec + renamable $sgpr10 = S_MOV_B32 1065353216, implicit $exec + renamable $sgpr13 = S_MOV_B32 3204448256, implicit $exec + renamable $sgpr12 = S_MOV_B32 1056964608, implicit $exec + $sgpr12 = S_MOV_B32 $sgpr11, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_32_necessary_move_not_removed_1 +# GCN: bb.1: +# GCN: $sgpr11 = S_MOV_B32 1065353216 +# GCN: $sgpr12 = S_MOV_B32 $sgpr11 +--- +name: s_mov_32_necessary_move_not_removed_1 +body: | + bb.0: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + renamable $sgpr11 = S_MOV_B32 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + $sgpr12 = S_MOV_B32 $sgpr11, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_32_necessary_move_not_removed_2 +# GCN: bb.1: +# GCN: $sgpr11 = S_MOV_B32 1065353216 +# GCN: $sgpr12 = S_MOV_B32 $sgpr11 +--- +name: s_mov_32_necessary_move_not_removed_2 +body: | + bb.0: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr11 = S_MOV_B32 0, implicit $exec + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + $sgpr12 = S_MOV_B32 $sgpr11, implicit $exec, implicit $exec +... + + + +# GCN-LABEL: name: s_mov_64_redundant_move_single +# GCN: bb.1: +# GCN-NOT: $sgpr8_sgpr9 = S_MOV_B64 1065353216 +# GCN: $sgpr6_sgpr7 = S_MOV_B64 $sgpr8_sgpr9 +--- +name: s_mov_64_redundant_move_single +body: | + bb.0: + renamable $sgpr8_sgpr9 = S_MOV_B64 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr8_sgpr9 = S_MOV_B64 1065353216, implicit $exec + $sgpr6_sgpr7 = S_MOV_B64 $sgpr8_sgpr9, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_64_redundant_move_liveness +# GCN: bb.1: +# GCN: liveins: $sgpr10_sgpr11 +# GCN-NOT: $sgpr10_sgpr11 = S_MOV_B64 1065353216 +# GCN: $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11 +--- +name: s_mov_64_redundant_move_liveness +tracksRegLiveness: true +body: | + bb.0: + renamable $sgpr10_sgpr11 = S_MOV_B64 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr10_sgpr11 = S_MOV_B64 1065353216, implicit $exec + $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_64_redundant_move_multiple +# GCN: bb.1: +# GCN-NOT: $sgpr10_sgpr11 = S_MOV_B64 0 +# GCN-NOT: $sgpr12_sgpr13 = S_MOV_B64 1065353216 +# GCN-NOT: $sgpr6_sgpr7 = S_MOV_B64 3204448256 +# GCN-NOT: $sgpr8_sgpr9 = S_MOV_B64 1056964608 +# GCN: $sgpr14_sgpr15 = S_MOV_B64 $sgpr8_sgpr9 +--- +name: s_mov_64_redundant_move_multiple +body: | + bb.0: + renamable $sgpr12_sgpr13 = S_MOV_B64 1065353216, implicit $exec + renamable $sgpr10_sgpr11 = S_MOV_B64 0, implicit $exec + renamable $sgpr8_sgpr9 = S_MOV_B64 1056964608, implicit $exec + renamable $sgpr6_sgpr7 = S_MOV_B64 3204448256, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr10_sgpr11 = S_MOV_B64 0, implicit $exec + renamable $sgpr12_sgpr13 = S_MOV_B64 1065353216, implicit $exec + renamable $sgpr6_sgpr7 = S_MOV_B64 3204448256, implicit $exec + renamable $sgpr8_sgpr9 = S_MOV_B64 1056964608, implicit $exec + $sgpr14_sgpr15 = S_MOV_B64 $sgpr8_sgpr9, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_64_necessary_move_not_removed_1 +# GCN: bb.1: +# GCN: $sgpr10_sgpr11 = S_MOV_B64 1065353216 +# GCN: $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11 +--- +name: s_mov_64_necessary_move_not_removed_1 +body: | + bb.0: + renamable $sgpr10_sgpr11 = S_MOV_B64 1065353216, implicit $exec + renamable $sgpr10_sgpr11 = S_MOV_B64 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr10_sgpr11 = S_MOV_B64 1065353216, implicit $exec + $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_64_necessary_move_not_removed_2 +# GCN: bb.1: +# GCN: $sgpr10_sgpr11 = S_MOV_B64 1065353216 +# GCN: $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11 +--- +name: s_mov_64_necessary_move_not_removed_2 +body: | + bb.0: + renamable $sgpr10_sgpr11 = S_MOV_B64 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr10_sgpr11 = S_MOV_B64 0, implicit $exec + renamable $sgpr10_sgpr11 = S_MOV_B64 1065353216, implicit $exec + $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11, implicit $exec, implicit $exec +...