diff --git a/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/llvm/include/llvm/CodeGen/LiveRangeEdit.h --- a/llvm/include/llvm/CodeGen/LiveRangeEdit.h +++ b/llvm/include/llvm/CodeGen/LiveRangeEdit.h @@ -97,11 +97,6 @@ /// scanRemattable - Identify the Parent values that may rematerialize. void scanRemattable(AAResults *aa); - /// allUsesAvailableAt - Return true if all registers used by OrigMI at - /// OrigIdx are also available with the same value at UseIdx. - bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, - SlotIndex UseIdx) const; - /// foldAsLoad - If LI has a single use and a single def that can be folded as /// a load, eliminate the register by folding the def into the use. bool foldAsLoad(LiveInterval *LI, SmallVectorImpl &Dead); @@ -207,6 +202,11 @@ explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI) {} }; + /// allUsesAvailableAt - Return true if all registers used by OrigMI at + /// OrigIdx are also available with the same value at UseIdx. + bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, + SlotIndex UseIdx) const; + /// canRematerializeAt - Determine if ParentVNI can be rematerialized at /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI. /// When cheapAsAMove is set, only cheap remats are allowed. diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -201,6 +201,11 @@ /// Recursively eliminate dead defs in DeadDefs. void eliminateDeadDefs(); + /// allUsesAvailableAt - Return true if all registers used by OrigMI at + /// OrigIdx are also available with the same value at UseIdx. + bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, + SlotIndex UseIdx); + /// LiveRangeEdit callback for eliminateDeadDefs(). void LRE_WillEraseInstruction(MachineInstr *MI) override; @@ -604,6 +609,14 @@ nullptr, this).eliminateDeadDefs(DeadDefs); } +bool RegisterCoalescer::allUsesAvailableAt(const MachineInstr *OrigMI, + SlotIndex OrigIdx, + SlotIndex UseIdx) { + SmallVector NewRegs; + return LiveRangeEdit(nullptr, NewRegs, *MF, *LIS, nullptr, this) + .allUsesAvailableAt(OrigMI, OrigIdx, UseIdx); +} + void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { // MI may be in WorkList. Make sure we don't visit it. ErasedInstrs.insert(MI); @@ -1343,6 +1356,9 @@ } } + if (!allUsesAvailableAt(DefMI, ValNo->def, CopyIdx)) + return false; + DebugLoc DL = CopyMI->getDebugLoc(); MachineBasicBlock *MBB = CopyMI->getParent(); MachineBasicBlock::iterator MII = diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-remat-dead-use.mir b/llvm/test/CodeGen/AMDGPU/coalescer-remat-dead-use.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/coalescer-remat-dead-use.mir @@ -0,0 +1,94 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -o - -verify-coalescing -run-pass=simple-register-coalescing %s | FileCheck -check-prefix=GCN %s + +--- +# Do not rematerialize V_MOV_B32 at COPY because source register %1 is killed. + +name: no_remat_killed_src_in_inst +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: no_remat_killed_src_in_inst + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[COPY]], implicit $exec + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[V_ADD_U32_e32_]], implicit $exec + ; GCN: $vgpr0 = COPY [[V_MOV_B32_e32_]] + ; GCN: SI_RETURN_TO_EPILOG $vgpr0 + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_ADD_U32_e32 1, %0, implicit $exec + %2:vgpr_32 = V_MOV_B32_e32 killed %1, implicit $exec + $vgpr0 = COPY killed %2 + SI_RETURN_TO_EPILOG killed $vgpr0 +... +--- +# Do not rematerialize V_MOV_B32 at COPY because source register %1 is killed +# after the MOV but before the COPY. + +name: no_remat_killed_src_after_inst +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: no_remat_killed_src_after_inst + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[COPY]], implicit $exec + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[V_ADD_U32_e32_]], implicit $exec + ; GCN: KILL [[V_ADD_U32_e32_]] + ; GCN: $vgpr0 = COPY [[V_MOV_B32_e32_]] + ; GCN: SI_RETURN_TO_EPILOG $vgpr0 + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_ADD_U32_e32 1, %0, implicit $exec + %2:vgpr_32 = V_MOV_B32_e32 %1, implicit $exec + KILL %1 + $vgpr0 = COPY killed %2 + SI_RETURN_TO_EPILOG killed $vgpr0 +... +--- +# Even if %1 is not killed do not rematerialize V_MOV_B32 so that we do not +# extend %1 liverange. + +name: no_remat_alive_src_in_inst_unused +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: no_remat_alive_src_in_inst_unused + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[COPY]], implicit $exec + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[V_ADD_U32_e32_]], implicit $exec + ; GCN: $vgpr0 = COPY [[V_MOV_B32_e32_]] + ; GCN: SI_RETURN_TO_EPILOG $vgpr0 + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_ADD_U32_e32 1, %0, implicit $exec + %2:vgpr_32 = V_MOV_B32_e32 %1, implicit $exec + $vgpr0 = COPY killed %2 + SI_RETURN_TO_EPILOG killed $vgpr0 +... +--- +# Rematerialize V_MOV_B32 since %1 is available at COPY and still alive. + +name: remat_alive_src_in_inst_used_and_available +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: remat_alive_src_in_inst_used_and_available + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[COPY]], implicit $exec + ; GCN: $vgpr0 = V_MOV_B32_e32 [[V_ADD_U32_e32_]], implicit $exec + ; GCN: SI_RETURN_TO_EPILOG $vgpr0 + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_ADD_U32_e32 1, %0, implicit $exec + %2:vgpr_32 = V_MOV_B32_e32 %1, implicit $exec + $vgpr0 = COPY killed %2, implicit %1 + SI_RETURN_TO_EPILOG killed $vgpr0 +...