Index: lib/Target/AMDGPU/SIShrinkInstructions.cpp =================================================================== --- lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -431,6 +431,84 @@ return TargetInstrInfo::RegSubRegPair(Reg, Sub); } +// Match: +// bb0: +// .. +// mov r, imm +// .. +// branch bb1 +// +// bb1: +// ; predecessors: %bb.0 +// .. +// mov r, imm <== redundant mov +// +// +// Returns true if the mov can be removed. +// +static bool isMovRedundant(MachineInstr &Mov, MachineRegisterInfo &MRI, + const SIInstrInfo *TII) { + + assert(Mov.getOpcode() == AMDGPU::V_MOV_B32_e32 || + Mov.getOpcode() == AMDGPU::S_MOV_B32 || + Mov.getOpcode() == AMDGPU::S_MOV_B64); + assert(Mov.getParent()->pred_size() == 1); + + auto DstOperand = Mov.getOpcode() == AMDGPU::V_MOV_B32_e32 + ? TII->getNamedOperand(Mov, AMDGPU::OpName::vdst) + : TII->getNamedOperand(Mov, AMDGPU::OpName::sdst); + + unsigned R = DstOperand->getReg(); + unsigned Rsub = DstOperand->getSubReg(); + auto MBB = Mov.getParent(); + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + + // Make sure that 'R' is not modified between the MOVs in 'MBB'. + auto I = std::next(Mov.getReverseIterator()), E = MBB->instr_rend(); + for (; I != E; ++I) { + if (instModifiesReg(&*I, R, Rsub, TRI)) + return false; + } + + unsigned Op = Mov.getOpcode(); + auto SrcOperand = TII->getNamedOperand(Mov, AMDGPU::OpName::src0); + assert(SrcOperand->isImm()); + const int64_t Imm = SrcOperand->getImm(); + + // Look for the same MOV in the predecessor. + auto Pred = *MBB->pred_begin(); + I = Pred->instr_rbegin(), E = Pred->instr_rend(); + for (; I != E; ++I) { + + MachineInstr *Instr = &*I; + auto InstrSrcOp = TII->getNamedOperand(*Instr, AMDGPU::OpName::src0); + auto InstrDstOp = Instr->getOpcode() == AMDGPU::V_MOV_B32_e32 + ? TII->getNamedOperand(*Instr, AMDGPU::OpName::vdst) + : TII->getNamedOperand(*Instr, AMDGPU::OpName::sdst); + + if (Instr->getOpcode() == Op && + InstrDstOp->getReg() == R && + InstrDstOp->getSubReg() == Rsub && + InstrSrcOp->isImm() && + InstrSrcOp->getImm() == Imm) { + break; + } + + // Make sure that 'R' is not modified between the MOVs in 'Pred'. + if (instModifiesReg(Instr, R, Rsub, TRI)) + return false; + } + + // If the same MOV was not found in the predecessor, bail out. + if (I == E) + return false; + + if (MRI.tracksLiveness() && !MBB->isLiveIn(R)) + MBB->addLiveIn(R); + + return true; +} + // Match: // mov t, x // mov x, y @@ -593,6 +671,23 @@ } } + if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 || + MI.getOpcode() == AMDGPU::S_MOV_B32 || + MI.getOpcode() == AMDGPU::S_MOV_B64) { + // If the MOV is identical to a MOV in the immediate predecessor + // of MBB and also no instruction between them modifies the destination + // register, then remove the MOV. + MachineOperand &Src = MI.getOperand(1); + if (Src.isImm() && + TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) { + + if (MBB.pred_size() == 1 && isMovRedundant(MI, MRI, TII)) { + MI.eraseFromParent(); + continue; + } + } + } + // Combine adjacent s_nops to use the immediate operand encoding how long // to wait. // Index: test/CodeGen/AMDGPU/control-flow-fastregalloc.ll =================================================================== --- test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -38,7 +38,6 @@ ; GCN: mask branch [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN: {{^}}BB{{[0-9]+}}_1: ; %if -; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD1:v[0-9]+]] ; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0) Index: test/CodeGen/AMDGPU/mov-opt.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/mov-opt.ll @@ -0,0 +1,83 @@ +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs | FileCheck %s + +; Check that the redundant immediate MOV instruction +; (by-product of handling phi nodes) is optimized away +; and not found in bb1. + +; CHECK-LABEL: {{^}}mov_opt: +; CHECK: v_mov_b32_e32 {{v[0-9]+}}, 1.0 +; CHECK: %bb.1: +; CHECK-NOT: v_mov_b32_e32 {{v[0-9]+}}, 1.0 +; CHECK: %bb.2: + +define void @mov_opt(i32, i32) local_unnamed_addr #0 { +.entry: + %2 = add i32 %1, %0 + br i1 undef, label %3, label %.critedge + +3: ; preds = %.entry + br i1 undef, label %4, label %.critedge + +4: ; preds = %3 + switch i32 undef, label %8 [ + i32 0, label %5 + i32 1, label %6 + i32 2, label %7 + ] + +5: ; preds = %4 + br label %8 + +6: ; preds = %4 + br label %8 + +7: ; preds = %4 + br label %8 + +8: ; preds = %7, %6, %5, %4 + %9 = add i32 0, %2 + %10 = lshr i32 %9, 1 + %11 = getelementptr <{ [4294967295 x i32] }>, <{ [4294967295 x i32] }> addrspace(6)* null, i32 0, i32 0, i32 %10 + %12 = ptrtoint i32 addrspace(6)* %11 to i32 + %13 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %12, i32 0) + %14 = lshr i32 %13, 0 + %15 = lshr i32 %14, 3 + %16 = and i32 %15, 7 + switch i32 %16, label %17 [ + i32 0, label %17 + i32 1, label %18 + i32 2, label %20 + i32 3, label %21 + ] + +17: ; preds = %8, %8 + br label %.critedge + +18: ; preds = %8 + %19 = fsub reassoc nnan nsz arcp contract float 1.000000e+00, undef + br label %.critedge + +20: ; preds = %8 + br label %.critedge + +21: ; preds = %8 + %22 = fsub reassoc nnan nsz arcp contract float 1.000000e+00, undef + br label %.critedge + +.critedge: ; preds = %21, %20, %18, %17, %3, %.entry + %__llpc_output_proxy_.3.0 = phi float [ 1.000000e+00, %3 ], [ undef, %21 ], [ undef, %20 ], [ undef, %18 ], [ 0.000000e+00, %17 ], [ 1.000000e+00, %.entry ] + %__llpc_output_proxy_.3.1 = phi float [ 0.000000e+00, %3 ], [ 0.000000e+00, %21 ], [ 0.000000e+00, %20 ], [ %19, %18 ], [ 0.000000e+00, %17 ], [ 0.000000e+00, %.entry ] + %__llpc_output_proxy_.3.3 = phi float [ 0.000000e+00, %3 ], [ %22, %21 ], [ undef, %20 ], [ undef, %18 ], [ undef, %17 ], [ 0.000000e+00, %.entry ] + call void @llvm.amdgcn.exp.f32(i32 immarg 40, i32 immarg 15, float %__llpc_output_proxy_.3.0, float %__llpc_output_proxy_.3.1, float undef, float %__llpc_output_proxy_.3.3, i1 immarg false, i1 immarg false) #2 + ret void +} + +; Function Attrs: inaccessiblememonly nounwind +declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0 + +; Function Attrs: nounwind readnone +declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) #1 + +attributes #0 = { inaccessiblememonly nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind } Index: test/CodeGen/AMDGPU/remove-redundant-mov.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/remove-redundant-mov.mir @@ -0,0 +1,284 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: v_mov_redundant_move_single +# GCN: bb.1: +# GCN-NOT: $vgpr2 = V_MOV_B32_e32 1065353216 +# GCN: $vgpr3 = V_MOV_B32_e32 $vgpr2 +--- +name: v_mov_redundant_move_single +body: | + bb.0: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: v_mov_redundant_move_liveness +# GCN: bb.1: +# GCN: liveins: $vgpr2 +# GCN-NOT: $vgpr2 = V_MOV_B32_e32 1065353216 +# GCN: $vgpr3 = V_MOV_B32_e32 $vgpr2 +--- +name: v_mov_redundant_move_liveness +tracksRegLiveness: true +body: | + bb.0: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: v_mov_redundant_move_multiple +# GCN: bb.1: +# GCN-NOT: $vgpr2 = V_MOV_B32_e32 0 +# GCN-NOT: $vgpr1 = V_MOV_B32_e32 1065353216 +# GCN-NOT: $vgpr4 = V_MOV_B32_e32 3204448256 +# GCN-NOT: $vgpr3 = V_MOV_B32_e32 1056964608 +# GCN: $vgpr3 = V_MOV_B32_e32 $vgpr2 +--- +name: v_mov_redundant_move_multiple +body: | + bb.0: + renamable $vgpr1 = V_MOV_B32_e32 1065353216, implicit $exec + renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr3 = V_MOV_B32_e32 1056964608, implicit $exec + renamable $vgpr4 = V_MOV_B32_e32 3204448256, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr1 = V_MOV_B32_e32 1065353216, implicit $exec + renamable $vgpr4 = V_MOV_B32_e32 3204448256, implicit $exec + renamable $vgpr3 = V_MOV_B32_e32 1056964608, implicit $exec + $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: v_mov_necessary_move_not_removed_1 +# GCN: bb.1: +# GCN: $vgpr2 = V_MOV_B32_e32 1065353216 +# GCN: $vgpr3 = V_MOV_B32_e32 $vgpr2 +--- +name: v_mov_necessary_move_not_removed_1 +body: | + bb.0: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: v_mov_necessary_move_not_removed_2 +# GCN: bb.1: +# GCN: $vgpr2 = V_MOV_B32_e32 1065353216 +# GCN: $vgpr3 = V_MOV_B32_e32 $vgpr2 +--- +name: v_mov_necessary_move_not_removed_2 +body: | + bb.0: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $exec +... + + + +# GCN-LABEL: name: s_mov_32_redundant_move_single +# GCN: bb.1: +# GCN-NOT: $sgpr11 = S_MOV_B32 1065353216 +# GCN: $sgpr12 = S_MOV_B32 $sgpr11 +--- +name: s_mov_32_redundant_move_single +body: | + bb.0: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + $sgpr12 = S_MOV_B32 $sgpr11, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_32_redundant_move_liveness +# GCN: bb.1: +# GCN: liveins: $sgpr11 +# GCN-NOT: $sgpr11 = S_MOV_B32 1065353216 +# GCN: $sgpr12 = S_MOV_B32 $sgpr11 +--- +name: s_mov_32_redundant_move_liveness +tracksRegLiveness: true +body: | + bb.0: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + $sgpr12 = S_MOV_B32 $sgpr11, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_32_redundant_move_multiple +# GCN: bb.1: +# GCN-NOT: $sgpr11 = S_MOV_B32 0 +# GCN-NOT: $sgpr10 = S_MOV_B32 1065353216 +# GCN-NOT: $sgpr13 = S_MOV_B32 3204448256 +# GCN-NOT: $sgpr12 = S_MOV_B32 1056964608 +# GCN: $sgpr12 = S_MOV_B32 $sgpr11 +--- +name: s_mov_32_redundant_move_multiple +body: | + bb.0: + renamable $sgpr10 = S_MOV_B32 1065353216, implicit $exec + renamable $sgpr11 = S_MOV_B32 0, implicit $exec + renamable $sgpr12 = S_MOV_B32 1056964608, implicit $exec + renamable $sgpr13 = S_MOV_B32 3204448256, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr11 = S_MOV_B32 0, implicit $exec + renamable $sgpr10 = S_MOV_B32 1065353216, implicit $exec + renamable $sgpr13 = S_MOV_B32 3204448256, implicit $exec + renamable $sgpr12 = S_MOV_B32 1056964608, implicit $exec + $sgpr12 = S_MOV_B32 $sgpr11, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_32_necessary_move_not_removed_1 +# GCN: bb.1: +# GCN: $sgpr11 = S_MOV_B32 1065353216 +# GCN: $sgpr12 = S_MOV_B32 $sgpr11 +--- +name: s_mov_32_necessary_move_not_removed_1 +body: | + bb.0: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + renamable $sgpr11 = S_MOV_B32 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + $sgpr12 = S_MOV_B32 $sgpr11, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_32_necessary_move_not_removed_2 +# GCN: bb.1: +# GCN: $sgpr11 = S_MOV_B32 1065353216 +# GCN: $sgpr12 = S_MOV_B32 $sgpr11 +--- +name: s_mov_32_necessary_move_not_removed_2 +body: | + bb.0: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr11 = S_MOV_B32 0, implicit $exec + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + $sgpr12 = S_MOV_B32 $sgpr11, implicit $exec, implicit $exec +... + + + +# GCN-LABEL: name: s_mov_64_redundant_move_single +# GCN: bb.1: +# GCN-NOT: $sgpr8_sgpr9 = S_MOV_B64 1065353216 +# GCN: $sgpr6_sgpr7 = S_MOV_B64 $sgpr8_sgpr9 +--- +name: s_mov_64_redundant_move_single +body: | + bb.0: + renamable $sgpr8_sgpr9 = S_MOV_B64 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr8_sgpr9 = S_MOV_B64 1065353216, implicit $exec + $sgpr6_sgpr7 = S_MOV_B64 $sgpr8_sgpr9, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_64_redundant_move_liveness +# GCN: bb.1: +# GCN: liveins: $sgpr10_sgpr11 +# GCN-NOT: $sgpr10_sgpr11 = S_MOV_B64 1065353216 +# GCN: $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11 +--- +name: s_mov_64_redundant_move_liveness +tracksRegLiveness: true +body: | + bb.0: + renamable $sgpr10_sgpr11 = S_MOV_B64 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr10_sgpr11 = S_MOV_B64 1065353216, implicit $exec + $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_64_redundant_move_multiple +# GCN: bb.1: +# GCN-NOT: $sgpr10_sgpr11 = S_MOV_B64 0 +# GCN-NOT: $sgpr12_sgpr13 = S_MOV_B64 1065353216 +# GCN-NOT: $sgpr6_sgpr7 = S_MOV_B64 3204448256 +# GCN-NOT: $sgpr8_sgpr9 = S_MOV_B64 1056964608 +# GCN: $sgpr14_sgpr15 = S_MOV_B64 $sgpr8_sgpr9 +--- +name: s_mov_64_redundant_move_multiple +body: | + bb.0: + renamable $sgpr12_sgpr13 = S_MOV_B64 1065353216, implicit $exec + renamable $sgpr10_sgpr11 = S_MOV_B64 0, implicit $exec + renamable $sgpr8_sgpr9 = S_MOV_B64 1056964608, implicit $exec + renamable $sgpr6_sgpr7 = S_MOV_B64 3204448256, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr10_sgpr11 = S_MOV_B64 0, implicit $exec + renamable $sgpr12_sgpr13 = S_MOV_B64 1065353216, implicit $exec + renamable $sgpr6_sgpr7 = S_MOV_B64 3204448256, implicit $exec + renamable $sgpr8_sgpr9 = S_MOV_B64 1056964608, implicit $exec + $sgpr14_sgpr15 = S_MOV_B64 $sgpr8_sgpr9, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_64_necessary_move_not_removed_1 +# GCN: bb.1: +# GCN: $sgpr10_sgpr11 = S_MOV_B64 1065353216 +# GCN: $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11 +--- +name: s_mov_64_necessary_move_not_removed_1 +body: | + bb.0: + renamable $sgpr10_sgpr11 = S_MOV_B64 1065353216, implicit $exec + renamable $sgpr10_sgpr11 = S_MOV_B64 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr10_sgpr11 = S_MOV_B64 1065353216, implicit $exec + $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_64_necessary_move_not_removed_2 +# GCN: bb.1: +# GCN: $sgpr10_sgpr11 = S_MOV_B64 1065353216 +# GCN: $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11 +--- +name: s_mov_64_necessary_move_not_removed_2 +body: | + bb.0: + renamable $sgpr10_sgpr11 = S_MOV_B64 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr10_sgpr11 = S_MOV_B64 0, implicit $exec + renamable $sgpr10_sgpr11 = S_MOV_B64 1065353216, implicit $exec + $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11, implicit $exec, implicit $exec +...