Index: llvm/lib/CodeGen/MachineCopyPropagation.cpp =================================================================== --- llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -68,6 +68,7 @@ STATISTIC(NumDeletes, "Number of dead copies deleted"); STATISTIC(NumCopyForwards, "Number of copy uses forwarded"); +STATISTIC(NumCopyBackwardPropagated, "Number of copy defs backward propagated"); DEBUG_COUNTER(FwdCounter, "machine-cp-fwd", "Controls which register COPYs are forwarded"); @@ -211,11 +212,13 @@ void ReadRegister(unsigned Reg, MachineInstr &Reader, DebugType DT); void CopyPropagateBlock(MachineBasicBlock &MBB); + bool eraseIfRedundant(MachineInstr &Copy); bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); void forwardUses(MachineInstr &MI); bool isForwardableRegClassCopy(const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx); bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use); + bool isSafeBackwardCopyPropagation(MachineInstr &Copy, MachineInstr &SrcMI); /// Candidates for deletion. SmallSetVector MaybeDeadCopies; @@ -274,6 +277,53 @@ return SubIdx == TRI->getSubRegIndex(PreviousDef, Def); } +bool MachineCopyPropagation::isSafeBackwardCopyPropagation( + MachineInstr &Copy, MachineInstr &SrcMI) { + MachineOperand &SrcOp = SrcMI.getOperand(0); + if (!(SrcOp.isReg() && SrcOp.isDef() && + SrcOp.getReg() == Copy.getOperand(1).getReg() && SrcOp.isRenamable() && + !hasImplicitOverlap(SrcMI, SrcOp))) + return false; + if (const TargetRegisterClass *URC = SrcMI.getRegClassConstraint(0, TII, TRI)) + return URC->contains(Copy.getOperand(0).getReg()); + return false; +} + +bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy) { + // Only take terminal BBs into account. + if (!Copy.getParent()->succ_empty()) + return false; + if (!Copy.getOperand(1).isRenamable() || !Copy.getOperand(1).isKill()) + return false; + unsigned Def = Copy.getOperand(0).getReg(); + unsigned Src = Copy.getOperand(1).getReg(); + if (MRI->isReserved(Src) || MRI->isReserved(Def)) + return false; + MachineBasicBlock::reverse_iterator E = Copy.getParent()->rend(), It = Copy; + It++; + MachineInstr *SrcMI = nullptr; + for (; It != E; ++It) { + if (It->readsRegister(Src, TRI) || It->readsRegister(Def, TRI)) + return false; + if (It->modifiesRegister(Def, TRI)) + return false; + if (It->modifiesRegister(Src, TRI)) { + SrcMI = &*It; + break; + } + } + if (!SrcMI) + return false; + if (!isSafeBackwardCopyPropagation(Copy, *SrcMI)) + return false; + SrcMI->getOperand(0).setReg(Def); + SrcMI->getOperand(0).setIsRenamable(Copy.getOperand(0).isRenamable()); + Copy.eraseFromParent(); + ++NumCopyBackwardPropagated; + ++NumDeletes; + return true; +} + /// Remove instruction \p Copy if there exists a previous copy that copies the /// register \p Src to the register \p Def; This may happen indirectly by /// copying the super registers. @@ -475,6 +525,17 @@ !Register::isVirtualRegister(Src) && "MachineCopyPropagation should be run after register allocation!"); + // In a terminal BB, + // $reg0 = OP ... + // ... <<< No uses of $reg0 and $reg1, no defs of $reg0 and $reg1 + // $reg1 = COPY $reg0 <<< $reg0 is killed + // => + // $reg1 = OP ... + // ... + // + if (eraseIfRedundant(*MI)) + continue; + // The two copies cancel out and the source of the first copy // hasn't been overridden, eliminate the second one. e.g. // %ecx = COPY %eax Index: llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll =================================================================== --- llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll +++ llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll @@ -26,8 +26,7 @@ ; CHECK-P9-NEXT: cmplwi r3, 2 ; CHECK-P9-NEXT: bge- cr0, .LBB0_6 ; CHECK-P9-NEXT: # %bb.3: # %land.lhs.true.1 -; CHECK-P9-NEXT: li r5, 0 -; CHECK-P9-NEXT: mr r3, r5 +; CHECK-P9-NEXT: li r3, 0 ; CHECK-P9-NEXT: blr ; CHECK-P9-NEXT: .LBB0_4: # %lor.lhs.false ; CHECK-P9-NEXT: cmplwi cr0, r4, 0 Index: llvm/test/CodeGen/X86/mul-i1024.ll =================================================================== --- llvm/test/CodeGen/X86/mul-i1024.ll +++ llvm/test/CodeGen/X86/mul-i1024.ll @@ -354,8 +354,8 @@ ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl 4(%ecx), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 4(%eax), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx @@ -764,8 +764,8 @@ ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl 84(%ecx), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl 84(%eax), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx Index: llvm/test/CodeGen/X86/mul-i512.ll =================================================================== --- llvm/test/CodeGen/X86/mul-i512.ll +++ llvm/test/CodeGen/X86/mul-i512.ll @@ -91,8 +91,8 @@ ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl (%ecx), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl (%eax), %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: xorl %ebp, %ebp ; X32-NEXT: mull %ebp @@ -1172,9 +1172,8 @@ ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl 40(%esi), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl 40(%ecx), %ebx ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload