Index: lib/Target/AMDGPU/SIShrinkInstructions.cpp =================================================================== --- lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -431,6 +431,84 @@ return TargetInstrInfo::RegSubRegPair(Reg, Sub); } +// Match: +// bb0: +// .. +// mov r, imm +// .. +// branch bb1 +// +// bb1: +// ; predecessors: %bb.0 +// .. +// mov r, imm <== redundant mov +// +// +// Returns true if the mov can be removed. +// +static bool isMovRedundant(MachineInstr &Mov, MachineRegisterInfo &MRI, + const SIInstrInfo *TII) { + + assert(Mov.getOpcode() == AMDGPU::V_MOV_B32_e32 || + Mov.getOpcode() == AMDGPU::S_MOV_B32 || + Mov.getOpcode() == AMDGPU::S_MOV_B64); + assert(Mov.getParent()->pred_size() == 1); + + auto DstOperand = Mov.getOpcode() == AMDGPU::V_MOV_B32_e32 + ? TII->getNamedOperand(Mov, AMDGPU::OpName::vdst) + : TII->getNamedOperand(Mov, AMDGPU::OpName::sdst); + + unsigned R = DstOperand->getReg(); + unsigned Rsub = DstOperand->getSubReg(); + auto MBB = Mov.getParent(); + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + + // Make sure that 'R' is not modified between the MOVs in 'MBB'. + auto I = std::next(Mov.getReverseIterator()), E = MBB->instr_rend(); + for (; I != E; ++I) { + if (instModifiesReg(&*I, R, Rsub, TRI)) + return false; + } + + unsigned Op = Mov.getOpcode(); + auto SrcOperand = TII->getNamedOperand(Mov, AMDGPU::OpName::src0); + assert(SrcOperand->isImm()); + const int64_t Imm = SrcOperand->getImm(); + + // Look for the same MOV in the predecessor. + auto Pred = *MBB->pred_begin(); + I = Pred->instr_rbegin(), E = Pred->instr_rend(); + for (; I != E; ++I) { + + MachineInstr *Instr = &*I; + auto InstrSrcOp = TII->getNamedOperand(*Instr, AMDGPU::OpName::src0); + auto InstrDstOp = Instr->getOpcode() == AMDGPU::V_MOV_B32_e32 + ? TII->getNamedOperand(*Instr, AMDGPU::OpName::vdst) + : TII->getNamedOperand(*Instr, AMDGPU::OpName::sdst); + + if (Instr->getOpcode() == Op && + InstrDstOp->getReg() == R && + InstrDstOp->getSubReg() == Rsub && + InstrSrcOp->isImm() && + InstrSrcOp->getImm() == Imm) { + break; + } + + // Make sure that 'R' is not modified between the MOVs in 'Pred'. + if (instModifiesReg(Instr, R, Rsub, TRI)) + return false; + } + + // If the same MOV was not found in the predecessor, bail out. + if (I == E) + return false; + + if (MRI.tracksLiveness() && !MBB->isLiveIn(R)) + MBB->addLiveIn(R); + + return true; +} + // Match: // mov t, x // mov x, y @@ -593,6 +671,23 @@ } } + if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 || + MI.getOpcode() == AMDGPU::S_MOV_B32 || + MI.getOpcode() == AMDGPU::S_MOV_B64) { + // If the MOV is identical to a MOV in the immediate predecessor + // of MBB and also no instruction between them modifies the destination + // register, then remove the MOV. + MachineOperand &Src = MI.getOperand(1); + if (Src.isImm() && + TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) { + + if (MBB.pred_size() == 1 && isMovRedundant(MI, MRI, TII)) { + MI.eraseFromParent(); + continue; + } + } + } + // Combine adjacent s_nops to use the immediate operand encoding how long // to wait. // Index: test/CodeGen/AMDGPU/control-flow-fastregalloc.ll =================================================================== --- test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -38,7 +38,6 @@ ; GCN: mask branch [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN: {{^}}BB{{[0-9]+}}_1: ; %if -; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD1:v[0-9]+]] ; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0) Index: test/CodeGen/AMDGPU/mov-opt.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/mov-opt.ll @@ -0,0 +1,133 @@ +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs | FileCheck %s + +; Check that redundant immediate MOV instructions are +; optimized away and not found in bb1. + +; CHECK-LABEL: {{^}}mov_opt: +; CHECK: v_mov_b32_e32 {{v[0-9]+}}, 0 +; CHECK: v_mov_b32_e32 {{v[0-9]+}}, 1.0 +; CHECK: BB0_1: +; CHECK-NOT: v_mov_b32_e32 {{v[0-9]+}}, 0 +; CHECK-NOT: v_mov_b32_e32 {{v[0-9]+}}, 1.0 +; CHECK: BB0_2: + +define void @mov_opt(i32 inreg, i32 inreg, <4 x i32> inreg, i32 inreg %inp1, i32 inreg %inp2, i32 inreg %inp3, i32 inreg, i32 inreg, i32 inreg, i32) local_unnamed_addr #5 { +.entry: + %7 = call i64 @llvm.amdgcn.s.getpc() #10 + %8 = bitcast i64 %7 to <2 x i32> + %9 = insertelement <2 x i32> %8, i32 %inp1, i32 0 + %10 = bitcast <2 x i32> %9 to i64 + %11 = inttoptr i64 %10 to [4294967295 x i8] addrspace(4)* + %12 = insertelement <2 x i32> %8, i32 %inp2, i32 0 + %13 = insertelement <2 x i32> %8, i32 %inp3, i32 0 + %14 = bitcast <2 x i32> %13 to i64 + %15 = add i32 %6, %4 + %16 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %11, i64 0, i64 16 + %17 = bitcast i8 addrspace(4)* %16 to <4 x i32> addrspace(4)*, !amdgpu.uniform !6 + %18 = load <4 x i32>, <4 x i32> addrspace(4)* %17, align 16 + %19 = inttoptr i64 %10 to <4 x i32> addrspace(4)* + %20 = load <4 x i32>, <4 x i32> addrspace(4)* %19, align 16 + %21 = inttoptr i64 %14 to <4 x i32> addrspace(4)* + %22 = load <4 x i32>, <4 x i32> addrspace(4)* %21, align 16 + %23 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %22, i32 192, i32 0) + %24 = bitcast i32 %23 to float + %25 = fcmp ogt float %24, 0.000000e+00 + br i1 %25, label %26, label %.critedge + +26: ; preds = %.entry + %27 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %20, i32 544, i32 0) + %28 = bitcast i32 %27 to float + %29 = fcmp oeq float %28, 0.000000e+00 + br i1 %29, label %30, label %.critedge + +30: ; preds = %26 + %.0.vec.extract = extractelement <4 x i32> %2, i32 0 + switch i32 %.0.vec.extract, label %43 [ + i32 0, label %31 + i32 1, label %35 + i32 2, label %39 + ] + +31: ; preds = %30 + %32 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %22, i32 0, i32 0) + %33 = bitcast i32 %32 to float + %34 = fptoui float %33 to i32 + br label %43 + +35: ; preds = %30 + %36 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %22, i32 4, i32 0) + %37 = bitcast i32 %36 to float + %38 = fptoui float %37 to i32 + br label %43 + +39: ; preds = %30 + %40 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %22, i32 8, i32 0) + %41 = bitcast i32 %40 to float + %42 = fptoui float %41 to i32 + br label %43 + +43: ; preds = %39, %35, %31, %30 + %.0540 = phi i32 [ 0, %30 ], [ %42, %39 ], [ %38, %35 ], [ %34, %31 ] + %44 = add i32 %.0540, %15 + %45 = lshr i32 %44, 1 + %46 = shl i32 %44, 4 + %47 = and i32 %46, 16 + %48 = getelementptr <{ [4294967295 x i32] }>, <{ [4294967295 x i32] }> addrspace(6)* null, i32 0, i32 0, i32 %45 + %49 = ptrtoint i32 addrspace(6)* %48 to i32 + %50 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %18, i32 %49, i32 0) + %51 = lshr i32 %50, %47 + %52 = lshr i32 %51, 3 + %53 = and i32 %52, 7 + %54 = lshr i32 %51, 6 + %55 = and i32 %54, 1023 + %56 = uitofp i32 %55 to float + %57 = fmul reassoc nnan nsz arcp contract float %56, 0x3F50040100000000 + switch i32 %53, label %58 [ + i32 3, label %64 + i32 1, label %60 + i32 2, label %62 + ] + +58: ; preds = %43 + %59 = fsub reassoc nnan nsz arcp contract float 1.000000e+00, %57 + br label %.critedge + +60: ; preds = %43 + %61 = fsub reassoc nnan nsz arcp contract float 1.000000e+00, %57 + br label %.critedge + +62: ; preds = %43 + %63 = fsub reassoc nnan nsz arcp contract float 1.000000e+00, %57 + br label %.critedge + +64: ; preds = %43 + %65 = fsub reassoc nnan nsz arcp contract float 1.000000e+00, %57 + br label %.critedge + +.critedge: ; preds = %64, %62, %60, %58, %26, %.entry + %__llpc_output_proxy_.3.0 = phi float [ 1.000000e+00, %26 ], [ %65, %64 ], [ %63, %62 ], [ %61, %60 ], [ %59, %58 ], [ 1.000000e+00, %.entry ] + %__llpc_output_proxy_.3.1 = phi float [ 0.000000e+00, %26 ], [ %65, %64 ], [ %63, %62 ], [ %61, %60 ], [ %59, %58 ], [ 0.000000e+00, %.entry ] + %__llpc_output_proxy_.3.2 = phi float [ 0.000000e+00, %26 ], [ %65, %64 ], [ %63, %62 ], [ %61, %60 ], [ %59, %58 ], [ 0.000000e+00, %.entry ] + %__llpc_output_proxy_.3.3 = phi float [ 0.000000e+00, %26 ], [ %65, %64 ], [ %63, %62 ], [ %61, %60 ], [ %59, %58 ], [ 0.000000e+00, %.entry ] + call void @llvm.amdgcn.exp.f32(i32 immarg 40, i32 immarg 15, float %__llpc_output_proxy_.3.0, float %__llpc_output_proxy_.3.1, float %__llpc_output_proxy_.3.2, float %__llpc_output_proxy_.3.3, i1 immarg false, i1 immarg false) #11 + ret void +} + + +attributes #0 = { argmemonly nounwind } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { nounwind readnone } +attributes #3 = { convergent nounwind readnone } +attributes #4 = { convergent nounwind readnone speculatable } +attributes #5 = { nounwind "target-features"="" } +attributes #6 = { nounwind "InitialPSInputAddr"="3842" "target-features"="" } +attributes #7 = { nounwind readonly } +attributes #8 = { nounwind } + +declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #8 + +declare i64 @llvm.amdgcn.s.getpc() #1 + +declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) #2 + +!6 = !{} Index: test/CodeGen/AMDGPU/remove-redundant-mov.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/remove-redundant-mov.mir @@ -0,0 +1,284 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: v_mov_redundant_move_single +# GCN: bb.1: +# GCN-NOT: $vgpr2 = V_MOV_B32_e32 1065353216 +# GCN: $vgpr3 = V_MOV_B32_e32 $vgpr2 +--- +name: v_mov_redundant_move_single +body: | + bb.0: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: v_mov_redundant_move_liveness +# GCN: bb.1: +# GCN: liveins: $vgpr2 +# GCN-NOT: $vgpr2 = V_MOV_B32_e32 1065353216 +# GCN: $vgpr3 = V_MOV_B32_e32 $vgpr2 +--- +name: v_mov_redundant_move_liveness +tracksRegLiveness: true +body: | + bb.0: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: v_mov_redundant_move_multiple +# GCN: bb.1: +# GCN-NOT: $vgpr2 = V_MOV_B32_e32 0 +# GCN-NOT: $vgpr1 = V_MOV_B32_e32 1065353216 +# GCN-NOT: $vgpr4 = V_MOV_B32_e32 3204448256 +# GCN-NOT: $vgpr3 = V_MOV_B32_e32 1056964608 +# GCN: $vgpr3 = V_MOV_B32_e32 $vgpr2 +--- +name: v_mov_redundant_move_multiple +body: | + bb.0: + renamable $vgpr1 = V_MOV_B32_e32 1065353216, implicit $exec + renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr3 = V_MOV_B32_e32 1056964608, implicit $exec + renamable $vgpr4 = V_MOV_B32_e32 3204448256, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr1 = V_MOV_B32_e32 1065353216, implicit $exec + renamable $vgpr4 = V_MOV_B32_e32 3204448256, implicit $exec + renamable $vgpr3 = V_MOV_B32_e32 1056964608, implicit $exec + $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: v_mov_necessary_move_not_removed_1 +# GCN: bb.1: +# GCN: $vgpr2 = V_MOV_B32_e32 1065353216 +# GCN: $vgpr3 = V_MOV_B32_e32 $vgpr2 +--- +name: v_mov_necessary_move_not_removed_1 +body: | + bb.0: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: v_mov_necessary_move_not_removed_2 +# GCN: bb.1: +# GCN: $vgpr2 = V_MOV_B32_e32 1065353216 +# GCN: $vgpr3 = V_MOV_B32_e32 $vgpr2 +--- +name: v_mov_necessary_move_not_removed_2 +body: | + bb.0: + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $vgpr2 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec + $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $exec +... + + + +# GCN-LABEL: name: s_mov_32_redundant_move_single +# GCN: bb.1: +# GCN-NOT: $sgpr11 = S_MOV_B32 1065353216 +# GCN: $sgpr12 = S_MOV_B32 $sgpr11 +--- +name: s_mov_32_redundant_move_single +body: | + bb.0: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + $sgpr12 = S_MOV_B32 $sgpr11, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_32_redundant_move_liveness +# GCN: bb.1: +# GCN: liveins: $sgpr11 +# GCN-NOT: $sgpr11 = S_MOV_B32 1065353216 +# GCN: $sgpr12 = S_MOV_B32 $sgpr11 +--- +name: s_mov_32_redundant_move_liveness +tracksRegLiveness: true +body: | + bb.0: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + $sgpr12 = S_MOV_B32 $sgpr11, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_32_redundant_move_multiple +# GCN: bb.1: +# GCN-NOT: $sgpr11 = S_MOV_B32 0 +# GCN-NOT: $sgpr10 = S_MOV_B32 1065353216 +# GCN-NOT: $sgpr13 = S_MOV_B32 3204448256 +# GCN-NOT: $sgpr12 = S_MOV_B32 1056964608 +# GCN: $sgpr12 = S_MOV_B32 $sgpr11 +--- +name: s_mov_32_redundant_move_multiple +body: | + bb.0: + renamable $sgpr10 = S_MOV_B32 1065353216, implicit $exec + renamable $sgpr11 = S_MOV_B32 0, implicit $exec + renamable $sgpr12 = S_MOV_B32 1056964608, implicit $exec + renamable $sgpr13 = S_MOV_B32 3204448256, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr11 = S_MOV_B32 0, implicit $exec + renamable $sgpr10 = S_MOV_B32 1065353216, implicit $exec + renamable $sgpr13 = S_MOV_B32 3204448256, implicit $exec + renamable $sgpr12 = S_MOV_B32 1056964608, implicit $exec + $sgpr12 = S_MOV_B32 $sgpr11, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_32_necessary_move_not_removed_1 +# GCN: bb.1: +# GCN: $sgpr11 = S_MOV_B32 1065353216 +# GCN: $sgpr12 = S_MOV_B32 $sgpr11 +--- +name: s_mov_32_necessary_move_not_removed_1 +body: | + bb.0: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + renamable $sgpr11 = S_MOV_B32 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + $sgpr12 = S_MOV_B32 $sgpr11, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_32_necessary_move_not_removed_2 +# GCN: bb.1: +# GCN: $sgpr11 = S_MOV_B32 1065353216 +# GCN: $sgpr12 = S_MOV_B32 $sgpr11 +--- +name: s_mov_32_necessary_move_not_removed_2 +body: | + bb.0: + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr11 = S_MOV_B32 0, implicit $exec + renamable $sgpr11 = S_MOV_B32 1065353216, implicit $exec + $sgpr12 = S_MOV_B32 $sgpr11, implicit $exec, implicit $exec +... + + + +# GCN-LABEL: name: s_mov_64_redundant_move_single +# GCN: bb.1: +# GCN-NOT: $sgpr8_sgpr9 = S_MOV_B64 1065353216 +# GCN: $sgpr6_sgpr7 = S_MOV_B64 $sgpr8_sgpr9 +--- +name: s_mov_64_redundant_move_single +body: | + bb.0: + renamable $sgpr8_sgpr9 = S_MOV_B64 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr8_sgpr9 = S_MOV_B64 1065353216, implicit $exec + $sgpr6_sgpr7 = S_MOV_B64 $sgpr8_sgpr9, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_64_redundant_move_liveness +# GCN: bb.1: +# GCN: liveins: $sgpr10_sgpr11 +# GCN-NOT: $sgpr10_sgpr11 = S_MOV_B64 1065353216 +# GCN: $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11 +--- +name: s_mov_64_redundant_move_liveness +tracksRegLiveness: true +body: | + bb.0: + renamable $sgpr10_sgpr11 = S_MOV_B64 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr10_sgpr11 = S_MOV_B64 1065353216, implicit $exec + $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_64_redundant_move_multiple +# GCN: bb.1: +# GCN-NOT: $sgpr10_sgpr11 = S_MOV_B64 0 +# GCN-NOT: $sgpr12_sgpr13 = S_MOV_B64 1065353216 +# GCN-NOT: $sgpr6_sgpr7 = S_MOV_B64 3204448256 +# GCN-NOT: $sgpr8_sgpr9 = S_MOV_B64 1056964608 +# GCN: $sgpr14_sgpr15 = S_MOV_B64 $sgpr8_sgpr9 +--- +name: s_mov_64_redundant_move_multiple +body: | + bb.0: + renamable $sgpr12_sgpr13 = S_MOV_B64 1065353216, implicit $exec + renamable $sgpr10_sgpr11 = S_MOV_B64 0, implicit $exec + renamable $sgpr8_sgpr9 = S_MOV_B64 1056964608, implicit $exec + renamable $sgpr6_sgpr7 = S_MOV_B64 3204448256, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr10_sgpr11 = S_MOV_B64 0, implicit $exec + renamable $sgpr12_sgpr13 = S_MOV_B64 1065353216, implicit $exec + renamable $sgpr6_sgpr7 = S_MOV_B64 3204448256, implicit $exec + renamable $sgpr8_sgpr9 = S_MOV_B64 1056964608, implicit $exec + $sgpr14_sgpr15 = S_MOV_B64 $sgpr8_sgpr9, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_64_necessary_move_not_removed_1 +# GCN: bb.1: +# GCN: $sgpr10_sgpr11 = S_MOV_B64 1065353216 +# GCN: $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11 +--- +name: s_mov_64_necessary_move_not_removed_1 +body: | + bb.0: + renamable $sgpr10_sgpr11 = S_MOV_B64 1065353216, implicit $exec + renamable $sgpr10_sgpr11 = S_MOV_B64 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr10_sgpr11 = S_MOV_B64 1065353216, implicit $exec + $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11, implicit $exec, implicit $exec +... + +# GCN-LABEL: name: s_mov_64_necessary_move_not_removed_2 +# GCN: bb.1: +# GCN: $sgpr10_sgpr11 = S_MOV_B64 1065353216 +# GCN: $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11 +--- +name: s_mov_64_necessary_move_not_removed_2 +body: | + bb.0: + renamable $sgpr10_sgpr11 = S_MOV_B64 1065353216, implicit $exec + S_BRANCH %bb.1 + + bb.1: + renamable $sgpr10_sgpr11 = S_MOV_B64 0, implicit $exec + renamable $sgpr10_sgpr11 = S_MOV_B64 1065353216, implicit $exec + $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11, implicit $exec, implicit $exec +...