diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -596,9 +596,57 @@ FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors)) return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors); - // If SuccToSinkTo is final destination and it is a post dominator of current - // block then it is not profitable to sink MI into SuccToSinkTo block. - return false; + MachineLoop *ML = LI->getLoopFor(MBB); + + // If the instruction is not inside a loop, it is not profitable to sink MI to + // a post dominate block SuccToSinkTo. + if (!ML) + return false; + + // If this instruction is inside a loop and sinking this instruction can make + // more registers live range shorten, it is still prifitable. + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + // Ignore non-register operands. + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (Reg == 0) + continue; + + // Don't handle physical register. + if (Register::isPhysicalRegister(Reg)) + return false; + + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + RegClassWeight W = TRI->getRegClassWeight(RC); + // Users for the defs are all dominated by SuccToSinkTo. + if (MO.isDef()) { + // This def register's live range is shortened after sinking. + bool LocalUse = false; + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge, + LocalUse)) + return false; + } else { + MachineInstr *DefMI = MRI->getVRegDef(Reg); + // DefMI is defined outside of loop. There should be no live range + // impact for this operand. Defination outside of loop means: + // 1: defination is outside of loop. + // 2: defination is in this loop, but it is a PHI in the loop header. + if (LI->getLoopFor(DefMI->getParent()) != ML || + (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent()))) + continue; + // DefMI is inside the loop. Mark it as not profitable as sinking MI will + // enlarge DefMI live range. + // FIXME: check the register pressure in block SuccToSinkTo, if it is + // smaller than the limit after sinking, it is still profitable to sink. + return false; + } + } + + // If MI is in loop and all its operands are alive across the whole loop, it + // is profitable to sink MI. + return true; } /// Get the sorted sequence of successors for this MachineBasicBlock, possibly diff --git a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir --- a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir +++ b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir @@ -370,7 +370,6 @@ ; CHECK: [[PHI5:%[0-9]+]]:gprc = PHI [[LI2]], %bb.2, %27, %bb.17 ; CHECK: [[PHI6:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_]], %bb.2, %55, %bb.17 ; CHECK: [[PHI7:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.2, %15, %bb.17 - ; CHECK: [[ADDI8_4:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDI8 [[PHI7]], 8 ; CHECK: [[LWZU:%[0-9]+]]:gprc, [[LWZU1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LWZU 8, [[PHI6]] :: (load 4 from %ir.46, !tbaa !2) ; CHECK: [[COPY10:%[0-9]+]]:gprc_and_gprc_nor0 = COPY [[PHI4]].sub_32 ; CHECK: [[MULHWU1:%[0-9]+]]:gprc = MULHWU [[COPY10]], [[ORI]] @@ -396,6 +395,7 @@ ; CHECK: bb.12 (%ir-block.60): ; CHECK: successors: %bb.15(0x2aaaaaab), %bb.13(0x55555555) ; CHECK: [[PHI8:%[0-9]+]]:gprc = PHI [[ADDI2]], %bb.11, [[ISEL1]], %bb.10 + ; CHECK: [[ADDI8_4:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDI8 [[PHI7]], 8 ; CHECK: [[COPY13:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[ADDI8_4]] ; CHECK: [[ADD4_3:%[0-9]+]]:gprc = nsw ADD4 [[PHI8]], [[ADD4_2]] ; CHECK: STW killed [[ADD4_3]], 0, [[ADDI8_4]] :: (store 4 into %ir.44, !tbaa !2) diff --git a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll --- a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll +++ b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll @@ -9,41 +9,40 @@ ; CHECK-NEXT: blt cr0, .LBB0_8 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: addi r4, r5, -4 -; CHECK-NEXT: addi r8, r6, -4 -; CHECK-NEXT: clrldi r7, r7, 32 +; CHECK-NEXT: clrldi r8, r7, 32 +; CHECK-NEXT: addi r7, r6, -4 ; CHECK-NEXT: li r5, 0 -; CHECK-NEXT: mtctr r7 -; CHECK-NEXT: lis r7, -30584 +; CHECK-NEXT: mtctr r8 +; CHECK-NEXT: lis r8, -30584 ; CHECK-NEXT: li r6, 0 ; CHECK-NEXT: cmplwi r3, 3 ; CHECK-NEXT: cmplwi cr1, r3, 1 -; CHECK-NEXT: ori r7, r7, 34953 +; CHECK-NEXT: ori r8, r8, 34953 ; CHECK-NEXT: b .LBB0_4 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: mulhwu r9, r6, r7 -; CHECK-NEXT: srwi r9, r9, 4 -; CHECK-NEXT: mulli r9, r9, 30 -; CHECK-NEXT: sub r9, r6, r9 +; CHECK-NEXT: mulhwu r3, r6, r8 +; CHECK-NEXT: srwi r3, r3, 4 +; CHECK-NEXT: mulli r3, r3, 30 +; CHECK-NEXT: sub r3, r6, r3 ; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: addi r6, r6, 1 -; CHECK-NEXT: add r9, r9, r5 -; CHECK-NEXT: stw r9, 4(r8) -; CHECK-NEXT: mr r8, r3 +; CHECK-NEXT: add r3, r3, r5 +; CHECK-NEXT: stw r3, 4(r7) +; CHECK-NEXT: addi r7, r7, 4 ; CHECK-NEXT: bdz .LBB0_8 ; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: lwzu r9, 4(r4) -; CHECK-NEXT: addi r3, r8, 4 -; CHECK-NEXT: add r5, r9, r5 +; CHECK-NEXT: lwzu r3, 4(r4) +; CHECK-NEXT: add r5, r3, r5 ; CHECK-NEXT: beq cr0, .LBB0_7 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: bne cr1, .LBB0_2 ; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: slwi r9, r6, 1 +; CHECK-NEXT: slwi r3, r6, 1 ; CHECK-NEXT: b .LBB0_3 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_7: -; CHECK-NEXT: addi r9, r6, 100 +; CHECK-NEXT: addi r3, r6, 100 ; CHECK-NEXT: b .LBB0_3 ; CHECK-NEXT: .LBB0_8: ; CHECK-NEXT: li r3, 0