diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -596,9 +596,55 @@ FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors)) return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors); - // If SuccToSinkTo is final destination and it is a post dominator of current - // block then it is not profitable to sink MI into SuccToSinkTo block. - return false; + MachineLoop *ML = LI->getLoopFor(MBB); + + // If the instruction is not inside a loop, it is not profitable to sink MI to + // a post dominate block SuccToSinkTo. + if (!ML) + return false; + + // If this instruction is inside a loop and sinking this instruction can make + // more registers live range shorten, it is still prifitable. + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + // Ignore non-register operands. + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (Reg == 0) + continue; + + // Don't handle physical register. + if (Register::isPhysicalRegister(Reg)) + return false; + + // Users for the defs are all dominated by SuccToSinkTo. + if (MO.isDef()) { + // This def register's live range is shortened after sinking. + bool LocalUse = false; + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge, + LocalUse)) + return false; + } else { + MachineInstr *DefMI = MRI->getVRegDef(Reg); + // DefMI is defined outside of loop. There should be no live range + // impact for this operand. Defination outside of loop means: + // 1: defination is outside of loop. + // 2: defination is in this loop, but it is a PHI in the loop header. + if (LI->getLoopFor(DefMI->getParent()) != ML || + (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent()))) + continue; + // DefMI is inside the loop. Mark it as not profitable as sinking MI will + // enlarge DefMI live range. + // FIXME: check the register pressure in block SuccToSinkTo, if it is + // smaller than the limit after sinking, it is still profitable to sink. + return false; + } + } + + // If MI is in loop and all its operands are alive across the whole loop, it + // is profitable to sink MI. + return true; } /// Get the sorted sequence of successors for this MachineBasicBlock, possibly diff --git a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir --- a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir +++ b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir @@ -370,7 +370,6 @@ ; CHECK: [[PHI5:%[0-9]+]]:gprc = PHI [[LI2]], %bb.2, %27, %bb.17 ; CHECK: [[PHI6:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_]], %bb.2, %55, %bb.17 ; CHECK: [[PHI7:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.2, %15, %bb.17 - ; CHECK: [[ADDI8_4:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDI8 [[PHI7]], 8 ; CHECK: [[LWZU:%[0-9]+]]:gprc, [[LWZU1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LWZU 8, [[PHI6]] :: (load 4 from %ir.46, !tbaa !2) ; CHECK: [[COPY10:%[0-9]+]]:gprc_and_gprc_nor0 = COPY [[PHI4]].sub_32 ; CHECK: [[MULHWU1:%[0-9]+]]:gprc = MULHWU [[COPY10]], [[ORI]] @@ -396,6 +395,7 @@ ; CHECK: bb.12 (%ir-block.60): ; CHECK: successors: %bb.15(0x2aaaaaab), %bb.13(0x55555555) ; CHECK: [[PHI8:%[0-9]+]]:gprc = PHI [[ADDI2]], %bb.11, [[ISEL1]], %bb.10 + ; CHECK: [[ADDI8_4:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDI8 [[PHI7]], 8 ; CHECK: [[COPY13:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[ADDI8_4]] ; CHECK: [[ADD4_3:%[0-9]+]]:gprc = nsw ADD4 [[PHI8]], [[ADD4_2]] ; CHECK: STW killed [[ADD4_3]], 0, [[ADDI8_4]] :: (store 4 into %ir.44, !tbaa !2) diff --git a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll deleted file mode 100644 --- a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll +++ /dev/null @@ -1,97 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s - -define signext i32 @foo(i32 signext %0, i32 signext %1, i32* %2, i32* %3, i32 signext %4) { -; CHECK-LABEL: foo: -; CHECK: # %bb.0: -; CHECK-NEXT: cmpwi r7, 1 -; CHECK-NEXT: blt cr0, .LBB0_8 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: addi r4, r5, -4 -; CHECK-NEXT: addi r8, r6, -4 -; CHECK-NEXT: clrldi r7, r7, 32 -; CHECK-NEXT: li r5, 0 -; CHECK-NEXT: mtctr r7 -; CHECK-NEXT: lis r7, -30584 -; CHECK-NEXT: li r6, 0 -; CHECK-NEXT: cmplwi r3, 3 -; CHECK-NEXT: cmplwi cr1, r3, 1 -; CHECK-NEXT: ori r7, r7, 34953 -; CHECK-NEXT: b .LBB0_4 -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: mulhwu r9, r6, r7 -; CHECK-NEXT: srwi r9, r9, 4 -; CHECK-NEXT: mulli r9, r9, 30 -; CHECK-NEXT: sub r9, r6, r9 -; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: addi r6, r6, 1 -; CHECK-NEXT: add r9, r9, r5 -; CHECK-NEXT: stw r9, 4(r8) -; CHECK-NEXT: mr r8, r3 -; CHECK-NEXT: bdz .LBB0_8 -; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: lwzu r9, 4(r4) -; CHECK-NEXT: addi r3, r8, 4 -; CHECK-NEXT: add r5, r9, r5 -; CHECK-NEXT: beq cr0, .LBB0_7 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: bne cr1, .LBB0_2 -; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: slwi r9, r6, 1 -; CHECK-NEXT: b .LBB0_3 -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_7: -; CHECK-NEXT: addi r9, r6, 100 -; CHECK-NEXT: b .LBB0_3 -; CHECK-NEXT: .LBB0_8: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: blr - %6 = icmp sgt i32 %4, 0 - br i1 %6, label %7, label %9 - -7: ; preds = %5 - %8 = zext i32 %4 to i64 - br label %10 - -9: ; preds = %25, %5 - ret i32 undef - -10: ; preds = %7, %25 - %11 = phi i64 [ 0, %7 ], [ %29, %25 ] - %12 = phi i32 [ 0, %7 ], [ %30, %25 ] - %13 = phi i32 [ 0, %7 ], [ %16, %25 ] - %14 = getelementptr inbounds i32, i32* %2, i64 %11 - %15 = load i32, i32* %14, align 4 - %16 = add nsw i32 %15, %13 - switch i32 %0, label %22 [ - i32 1, label %17 - i32 3, label %20 - ] - -17: ; preds = %10 - %18 = trunc i64 %11 to i32 - %19 = shl i32 %18, 1 - br label %25 - -20: ; preds = %10 - %21 = add nuw nsw i32 %12, 100 - br label %25 - -22: ; preds = %10 - %23 = trunc i64 %11 to i32 - %24 = urem i32 %23, 30 - br label %25 - -25: ; preds = %22, %20, %17 - %26 = phi i32 [ %24, %22 ], [ %21, %20 ], [ %19, %17 ] - %27 = add nsw i32 %26, %16 - %28 = getelementptr inbounds i32, i32* %3, i64 %11 - store i32 %27, i32* %28, align 4 - %29 = add nuw nsw i64 %11, 1 - %30 = add nuw nsw i32 %12, 1 - %31 = icmp eq i64 %29, %8 - br i1 %31, label %9, label %10 -} -