diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -601,6 +601,49 @@ FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors)) return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors); + // If this instruction is inside a loop and sinking this instruction can make + // more registers live range shorten, it is still prifitable. + MachineLoop *ML = LI->getLoopFor(MBB); + int LiveRangeShortenCount = 0; + if (ML) { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + // Ignore non-register operands. + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + // Don't handle physical register. + if (Reg == 0 || Register::isPhysicalRegister(Reg)) + return false; + + // Users for the defs are all dominated by SuccToSinkTo. + if (MO.isDef()) { + // This def register's live range is shortened after sinking. + bool LocalUse = false; + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge, + LocalUse)) + return false; + LiveRangeShortenCount++; + } else { + MachineInstr *DefMI = MRI->getVRegDef(Reg); + // DefMI is defined outside of loop. There should be no live range + // impact for this operand. Defination outside of loop means: + // 1: defination is outside of loop. + // 2: defination is in this loop, but it is a PHI in the loop header. + if (LI->getLoopFor(DefMI->getParent()) != ML || + (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent()))) + continue; + // DefMI is inside the loop. + LiveRangeShortenCount--; + } + } + // If there is no live range shorten number degradation, we treat it as + // profitable. Because after sinking all users of one defination inside the + // loop, we can also sink the defination later. + if (LiveRangeShortenCount >= 0) + return true; + } + // If SuccToSinkTo is final destination and it is a post dominator of current // block then it is not profitable to sink MI into SuccToSinkTo block. return false; diff --git a/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll --- a/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll +++ b/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll @@ -50,7 +50,7 @@ ; CHECK-NEXT: movslq (%r9), %rsi ; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: movb $1, %r10b +; CHECK-NEXT: movb $1, %r9b ; CHECK-NEXT: jmp .LBB0_3 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_9: # %b1606 @@ -87,7 +87,7 @@ ; CHECK-NEXT: je .LBB0_37 ; CHECK-NEXT: .LBB0_18: # %b188 ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: testb %r10b, %r10b +; CHECK-NEXT: testb %r9b, %r9b ; CHECK-NEXT: jne .LBB0_4 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_19: # %a30b294 @@ -97,27 +97,23 @@ ; CHECK-NEXT: je .LBB0_19 ; CHECK-NEXT: .LBB0_4: # %a33b ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: orl %r8d, %eax -; CHECK-NEXT: movl %eax, %r9d -; CHECK-NEXT: shrl $31, %r9d -; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: movl %esi, %r10d +; CHECK-NEXT: orl %r8d, %r10d ; CHECK-NEXT: jns .LBB0_20 ; CHECK-NEXT: .LBB0_5: # %a50b ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: shrl $31, %r10d ; CHECK-NEXT: movl %r8d, %eax ; CHECK-NEXT: orl %esi, %eax -; CHECK-NEXT: movl %eax, %r11d -; CHECK-NEXT: shrl $31, %r11d -; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: jns .LBB0_26 ; CHECK-NEXT: .LBB0_6: # %a57b ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: testb %r9b, %r9b +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: testb %r10b, %r10b ; CHECK-NEXT: je .LBB0_30 ; CHECK-NEXT: .LBB0_7: # %a66b ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: testb %r11b, %r11b +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB0_8 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_34: # %a74b @@ -187,7 +183,7 @@ ; CHECK-NEXT: je .LBB0_38 ; CHECK-NEXT: .LBB0_27: # %b879 ; CHECK-NEXT: # in Loop: Header=BB0_26 Depth=2 -; CHECK-NEXT: testb %r10b, %r10b +; CHECK-NEXT: testb %r9b, %r9b ; CHECK-NEXT: jne .LBB0_28 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_29: # %a53b1019