Index: lib/CodeGen/MachineBasicBlock.cpp =================================================================== --- lib/CodeGen/MachineBasicBlock.cpp +++ lib/CodeGen/MachineBasicBlock.cpp @@ -1439,6 +1439,20 @@ } } + // If we reached the end, it is safe to clobber Reg at the end of a block of + // no successor has it live in. + if (I == end()) { + for (MachineBasicBlock *S : successors()) { + for (MCSubRegIterator SubReg(Reg, TRI, /*IncludeSelf*/true); + SubReg.isValid(); ++SubReg) { + if (S->isLiveIn(*SubReg)) + return LQR_Live; + } + } + + return LQR_Dead; + } + // At this point we have no idea of the liveness of the register. return LQR_Unknown; } Index: test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir =================================================================== --- test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir +++ test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir @@ -318,14 +318,74 @@ --- +# We know this is OK because vcc isn't live out of the block. + +name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_no_liveout +tracksRegLiveness: true + +body: | + ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_no_liveout + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: bb.1: + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + bb.0: + successors: %bb.1 + + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + %0:sreg_32_xm0 = S_MOV_B32 12345 + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_NOP 0 + S_NOP 0 + + bb.1: + S_ENDPGM implicit %2 + +... + +--- + # We know this is OK because vcc isn't live out of the block, even -# though it had a defined value +# though it had a defined but unused. value -name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_dead_no_liveout +name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_no_liveout_dead_vcc_def tracksRegLiveness: true body: | - ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_dead_no_liveout + ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_no_liveout_dead_vcc_def ; GCN: bb.0: ; GCN: successors: %bb.1(0x80000000) ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 @@ -336,10 +396,12 @@ bb.0: successors: %bb.1 - $vcc = S_MOV_B64 -1 + S_NOP 0, implicit-def $vcc %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_NOP 0 + S_NOP 0 bb.1: S_ENDPGM implicit %2 Index: test/CodeGen/AMDGPU/split-scalar-i64-add.ll =================================================================== --- test/CodeGen/AMDGPU/split-scalar-i64-add.ll +++ test/CodeGen/AMDGPU/split-scalar-i64-add.ll @@ -10,8 +10,8 @@ ; FIXME: SIShrinkInstructions should force immediate fold. ; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_0: -; SI: s_movk_i32 [[K:s[0-9]+]], 0x18f -; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, [[K]], v{{[0-9]+}} +; SI: v_mov_b32_e32 [[V_VAL:v[0-9]+]], s +; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, 0x18f, [[V_VAL]] ; SI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc define amdgpu_kernel void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %s.val) { %v.val = load volatile i32, i32 addrspace(1)* %in