Index: llvm/lib/CodeGen/MachineCopyPropagation.cpp =================================================================== --- llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -37,6 +37,15 @@ // ... // No clobber of %R0 // %R1 = COPY %R0 <<< Removed // +// or +// +// $R0 = OP ... +// ... // No read/clobber of $R0 and $R1 +// $R1 = COPY $R0 // $R0 is killed +// Replace $R0 with $R1 and remove the COPY +// $R1 = OP ... +// ... +// //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" @@ -68,6 +77,7 @@ STATISTIC(NumDeletes, "Number of dead copies deleted"); STATISTIC(NumCopyForwards, "Number of copy uses forwarded"); +STATISTIC(NumCopyBackwardPropagated, "Number of copy defs backward propagated"); DEBUG_COUNTER(FwdCounter, "machine-cp-fwd", "Controls which register COPYs are forwarded"); @@ -211,11 +221,13 @@ void ReadRegister(unsigned Reg, MachineInstr &Reader, DebugType DT); void CopyPropagateBlock(MachineBasicBlock &MBB); + bool eraseIfRedundant(MachineInstr &Copy); bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); void forwardUses(MachineInstr &MI); bool isForwardableRegClassCopy(const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx); bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use); + bool isSafeBackwardCopyPropagation(MachineInstr &Copy, MachineInstr &SrcMI); /// Candidates for deletion. SmallSetVector MaybeDeadCopies; @@ -274,6 +286,57 @@ return SubIdx == TRI->getSubRegIndex(PreviousDef, Def); } +bool MachineCopyPropagation::isSafeBackwardCopyPropagation( + MachineInstr &Copy, MachineInstr &SrcMI) { + MachineOperand &SrcOp = SrcMI.getOperand(0); + if (!(SrcOp.isReg() && SrcOp.isDef() && + SrcOp.getReg() == Copy.getOperand(1).getReg() && SrcOp.isRenamable() && + !SrcOp.isTied() && !SrcOp.isImplicit() && + !MRI->isReserved(SrcOp.getReg()))) + return false; + if (const TargetRegisterClass *URC = SrcMI.getRegClassConstraint(0, TII, TRI)) + return URC->contains(Copy.getOperand(0).getReg()); + // FIXME: Since COPYs don't have register class constraints, we might miss + // some opportunities if SrcMI is a COPY. + return false; +} + +/// Remove instruction \p Copy if \p Copy's src and dst are not used or defined +/// between \p Copy and definition instruction of \p Copy's src. \p Copy's dst +/// will be backward propagated to where \p Copy's src is defined. +bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy) { + // Don't handle non-trivial copies. + if (Copy.getNumOperands() != 2 || !Copy.getOperand(1).isRenamable() || + !Copy.getOperand(1).isKill()) + return false; + unsigned Def = Copy.getOperand(0).getReg(); + unsigned Src = Copy.getOperand(1).getReg(); + if (MRI->isReserved(Src) || MRI->isReserved(Def)) + return false; + MachineBasicBlock::reverse_iterator E = Copy.getParent()->rend(), It = Copy; + It++; + MachineInstr *SrcMI = nullptr; + for (; It != E; ++It) { + if (It->readsRegister(Src, TRI) || It->readsRegister(Def, TRI)) + return false; + if (It->modifiesRegister(Def, TRI)) + return false; + if (It->modifiesRegister(Src, TRI)) { + SrcMI = &*It; + break; + } + } + if (!SrcMI || !isSafeBackwardCopyPropagation(Copy, *SrcMI)) + return false; + SrcMI->getOperand(0).setReg(Def); + SrcMI->getOperand(0).setIsRenamable(Copy.getOperand(0).isRenamable()); + Tracker.clobberRegister(Def, *TRI); + Copy.eraseFromParent(); + ++NumCopyBackwardPropagated; + ++NumDeletes; + return true; +} + /// Remove instruction \p Copy if there exists a previous copy that copies the /// register \p Src to the register \p Def; This may happen indirectly by /// copying the super registers. @@ -475,6 +538,15 @@ !Register::isVirtualRegister(Src) && "MachineCopyPropagation should be run after register allocation!"); + // $reg0 = OP ... + // ... <<< No read/clobber of $reg0 and $reg1 + // $reg1 = COPY $reg0 <<< $reg0 is killed + // => + // $reg1 = OP ... + // ... + if (eraseIfRedundant(*MI)) + continue; + // The two copies cancel out and the source of the first copy // hasn't been overridden, eliminate the second one. e.g. // %ecx = COPY %eax Index: llvm/test/CodeGen/PowerPC/machine-backward-cp.mir =================================================================== --- llvm/test/CodeGen/PowerPC/machine-backward-cp.mir +++ llvm/test/CodeGen/PowerPC/machine-backward-cp.mir @@ -10,8 +10,7 @@ body: | bb.0.entry: ; CHECK-LABEL: name: test0 - ; CHECK: renamable $x4 = LI8 1024 - ; CHECK: $x3 = COPY killed renamable $x4 + ; CHECK: $x3 = LI8 1024 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x4 = LI8 1024 $x3 = COPY renamable killed $x4 @@ -27,8 +26,7 @@ body: | ; CHECK-LABEL: name: test1 ; CHECK: bb.0.entry: - ; CHECK: renamable $x5 = LI8 42 - ; CHECK: renamable $x4 = COPY killed renamable $x5 + ; CHECK: renamable $x4 = LI8 42 ; CHECK: B %bb.1 ; CHECK: bb.1: ; CHECK: liveins: $x4 @@ -138,8 +136,7 @@ ; CHECK-LABEL: name: iterative_deletion ; CHECK: liveins: $x5 - ; CHECK: renamable $x6 = ADDI8 killed renamable $x5, 1 - ; CHECK: $x3 = COPY $x6 + ; CHECK: $x3 = ADDI8 killed renamable $x5, 1 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x6 = ADDI8 renamable killed $x5, 1 renamable $x4 = COPY renamable killed $x6 @@ -159,8 +156,8 @@ ; CHECK-LABEL: name: Enter ; CHECK: liveins: $x4, $x7 ; CHECK: renamable $x5 = COPY killed renamable $x7 - ; CHECK: renamable $x6 = ADDI8 killed renamable $x4, 1 - ; CHECK: $x3 = ADD8 killed renamable $x5, $x6 + ; CHECK: renamable $x7 = ADDI8 killed renamable $x4, 1 + ; CHECK: $x3 = ADD8 killed renamable $x5, killed renamable $x7 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x5 = COPY killed renamable $x7 renamable $x6 = ADDI8 killed renamable $x4, 1 @@ -180,10 +177,9 @@ ; CHECK-LABEL: name: foo ; CHECK: liveins: $x4, $x7 ; CHECK: renamable $x5 = COPY killed renamable $x7 - ; CHECK: renamable $x6 = ADDI8 renamable $x4, 1 - ; CHECK: renamable $x7 = COPY killed renamable $x6 - ; CHECK: renamable $x8 = ADDI8 killed $x4, 2 - ; CHECK: $x3 = ADD8 killed renamable $x5, $x8 + ; CHECK: renamable $x7 = ADDI8 renamable $x4, 1 + ; CHECK: renamable $x6 = ADDI8 killed $x4, 2 + ; CHECK: $x3 = ADD8 killed renamable $x5, killed renamable $x6 ; CHECK: $x3 = ADD8 $x3, killed renamable $x7 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x5 = COPY killed renamable $x7 @@ -207,10 +203,9 @@ ; CHECK-LABEL: name: bar ; CHECK: liveins: $x4, $x7 ; CHECK: renamable $x5 = COPY killed renamable $x7 - ; CHECK: renamable $x6 = ADDI8 renamable $x4, 1 - ; CHECK: renamable $x8 = COPY $x6 - ; CHECK: renamable $x6 = ADDI8 renamable $x5, 2 - ; CHECK: $x3 = ADD8 killed renamable $x5, $x6 + ; CHECK: renamable $x8 = ADDI8 renamable $x4, 1 + ; CHECK: renamable $x7 = ADDI8 renamable $x5, 2 + ; CHECK: $x3 = ADD8 killed renamable $x5, killed renamable $x7 ; CHECK: $x3 = ADD8 $x3, killed renamable $x8 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x5 = COPY killed renamable $x7 @@ -235,10 +230,9 @@ ; CHECK-LABEL: name: bogus ; CHECK: liveins: $x7 ; CHECK: renamable $x5 = COPY renamable $x7 - ; CHECK: renamable $x6 = ADDI8 $x7, 1 - ; CHECK: renamable $x7 = COPY $x6 + ; CHECK: renamable $x4 = ADDI8 $x7, 1 ; CHECK: renamable $x6 = ADDI8 renamable $x5, 2 - ; CHECK: $x3 = ADD8 $x7, killed renamable $x5 + ; CHECK: $x3 = ADD8 killed renamable $x4, killed renamable $x5 ; CHECK: $x3 = ADD8 $x3, killed renamable $x6 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x5 = COPY killed renamable $x7 @@ -262,10 +256,9 @@ liveins: $x7 ; CHECK-LABEL: name: foobar ; CHECK: liveins: $x7 - ; CHECK: renamable $x6 = ADDI8 $x7, 1 - ; CHECK: renamable $x8 = COPY $x6 - ; CHECK: renamable $x6 = ADDI8 $x7, 2 - ; CHECK: $x3 = ADD8 $x6, $x7 + ; CHECK: renamable $x8 = ADDI8 $x7, 1 + ; CHECK: renamable $x4 = ADDI8 $x7, 2 + ; CHECK: $x3 = ADD8 killed renamable $x4, $x7 ; CHECK: $x3 = ADD8 $x3, killed renamable $x8 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x5 = COPY killed renamable $x7 @@ -279,3 +272,59 @@ BLR8 implicit $lr8, implicit undef $rm, implicit $x3 ... + +--- +name: copy_with_side_effect +alignment: 4 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $cr0 + + ; CHECK-LABEL: name: copy_with_side_effect + ; CHECK: liveins: $x0, $cr0 + ; CHECK: renamable $x4 = LI8 1024 + ; CHECK: $x3 = COPY killed renamable $x4, implicit $x0, implicit-def $cr0 + ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 + renamable $x4 = LI8 1024 + $x3 = COPY renamable killed $x4, implicit $x0, implicit-def $cr0 + BLR8 implicit $lr8, implicit undef $rm, implicit $x3 + +... + +# FIXME: Need to enhance COPY's register constraint. +--- +name: transitive_copies +alignment: 4 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: transitive_copies + ; CHECK: bb.0.entry: + ; CHECK: liveins: $x3 + ; CHECK: renamable $x4 = COPY renamable $x3 + ; CHECK: renamable $x5 = COPY $x3 + ; CHECK: renamable $x6 = COPY $x3 + ; CHECK: renamable $x7 = COPY $x3 + ; CHECK: renamable $x8 = ADDI8 $x3, 1 + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: liveins: $x8 + ; CHECK: $x3 = COPY killed renamable $x8 + ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit $x3 + bb.0.entry: + liveins: $x3 + + renamable $x4 = COPY renamable killed $x3 + renamable $x5 = COPY renamable killed $x4 + renamable $x6 = COPY renamable killed $x5 + renamable $x7 = COPY renamable killed $x6 + renamable $x8 = ADDI8 renamable killed $x7, 1 + B %bb.1 + + bb.1: + liveins: $x8 + + $x3 = COPY renamable killed $x8 + BLR8 implicit $lr8, implicit $rm, implicit $x3 + +... Index: llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll =================================================================== --- llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll +++ llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll @@ -26,8 +26,7 @@ ; CHECK-P9-NEXT: cmplwi r3, 2 ; CHECK-P9-NEXT: bge- cr0, .LBB0_6 ; CHECK-P9-NEXT: # %bb.3: # %land.lhs.true.1 -; CHECK-P9-NEXT: li r5, 0 -; CHECK-P9-NEXT: mr r3, r5 +; CHECK-P9-NEXT: li r3, 0 ; CHECK-P9-NEXT: blr ; CHECK-P9-NEXT: .LBB0_4: # %lor.lhs.false ; CHECK-P9-NEXT: cmplwi cr0, r4, 0 Index: llvm/test/CodeGen/X86/fp128-i128.ll =================================================================== --- llvm/test/CodeGen/X86/fp128-i128.ll +++ llvm/test/CodeGen/X86/fp128-i128.ll @@ -508,9 +508,8 @@ ; AVX-NEXT: testl %ebp, %ebp ; AVX-NEXT: jle .LBB10_1 ; AVX-NEXT: # %bb.2: # %if.then -; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm2 ; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX-NEXT: vmovaps %xmm1, %xmm2 ; AVX-NEXT: jmp .LBB10_3 ; AVX-NEXT: .LBB10_1: ; AVX-NEXT: vmovaps (%rsp), %xmm2 # 16-byte Reload Index: llvm/test/CodeGen/X86/fshr.ll =================================================================== --- llvm/test/CodeGen/X86/fshr.ll +++ llvm/test/CodeGen/X86/fshr.ll @@ -279,8 +279,7 @@ ; X86-SLOW-NEXT: orl %edi, %edx ; X86-SLOW-NEXT: movl %edx, (%esp) # 4-byte Spill ; X86-SLOW-NEXT: .LBB4_2: -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SLOW-NEXT: movl %ecx, %edx +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SLOW-NEXT: movl %ebx, %ecx ; X86-SLOW-NEXT: shrl %cl, %edx ; X86-SLOW-NEXT: movb %bl, %ah Index: llvm/test/CodeGen/X86/i128-mul.ll =================================================================== --- llvm/test/CodeGen/X86/i128-mul.ll +++ llvm/test/CodeGen/X86/i128-mul.ll @@ -88,9 +88,8 @@ ; X86-NEXT: movl 4(%eax,%ebp,8), %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: mull %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %ecx, %eax Index: llvm/test/CodeGen/X86/mul-i512.ll =================================================================== --- llvm/test/CodeGen/X86/mul-i512.ll +++ llvm/test/CodeGen/X86/mul-i512.ll @@ -153,9 +153,8 @@ ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl 8(%esi), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl 8(%ecx), %ebx ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload Index: llvm/test/CodeGen/X86/shift-i128.ll =================================================================== --- llvm/test/CodeGen/X86/shift-i128.ll +++ llvm/test/CodeGen/X86/shift-i128.ll @@ -296,8 +296,7 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll %cl, %ebx ; X86-NEXT: movl %ebp, %esi @@ -534,8 +533,7 @@ ; X86-NEXT: .LBB6_9: # %entry ; X86-NEXT: movl %edi, %esi ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: shrl %cl, %ebp ; X86-NEXT: testb $32, %cl @@ -795,9 +793,8 @@ ; X86-NEXT: # %bb.4: # %entry ; X86-NEXT: movl %edi, %ebx ; X86-NEXT: .LBB7_5: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %ebp -; X86-NEXT: movl %ecx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %ebp, %edi ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: sarl %cl, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -835,8 +832,7 @@ ; X86-NEXT: movl %esi, %edi ; X86-NEXT: .LBB7_9: # %entry ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: sarl %cl, %esi ; X86-NEXT: testb $32, %cl @@ -850,8 +846,7 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movb $64, %cl ; X86-NEXT: subb %dl, %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: shldl %cl, %ebx, %ebp ; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -1059,12 +1054,11 @@ ; X86-NEXT: pushl %esi ; X86-NEXT: subl $72, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl %ebx, %ecx ; X86-NEXT: shll %cl, %ebp -; X86-NEXT: movl %eax, %esi ; X86-NEXT: shll %cl, %esi ; X86-NEXT: movl %edx, %eax ; X86-NEXT: subl $64, %eax @@ -1130,9 +1124,7 @@ ; X86-NEXT: movl %ecx, %ebp ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: shll %cl, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: shll %cl, %esi ; X86-NEXT: testb $32, %dl ; X86-NEXT: movl $0, %edi @@ -1210,8 +1202,7 @@ ; X86-NEXT: movl %edi, %ecx ; X86-NEXT: .LBB8_23: # %entry ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll %cl, %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill Index: llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll =================================================================== --- llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll +++ llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll @@ -98,8 +98,8 @@ ; X86-NEXT: addl %esi, %ecx ; X86-NEXT: adcl $0, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: mull %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: mull %edx ; X86-NEXT: movl %edx, %esi ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill