diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp --- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp +++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp @@ -26,6 +26,35 @@ Cand.SU->getInstr()->getOpcode() == PPC::ADDI8; } +static void pickTryCand(GenericScheduler::SchedCandidate &Cand, + GenericScheduler::SchedCandidate &TryCand) { + // Update the reason if we have already selected the TryCand. + if (TryCand.Reason != GenericScheduler::NoCand) { + if (TryCand.Reason > GenericScheduler::Stall) + TryCand.Reason = GenericScheduler::Stall; + } else { + // TryCand is not selected by heuristic and Cand is selected. If it is + // NodeOrder, we can still select the TryCand. + if (Cand.Reason == GenericScheduler::NoCand || + Cand.Reason == GenericScheduler::NodeOrder) + TryCand.Reason = GenericScheduler::Stall; + } +} + +static void pickCand(GenericScheduler::SchedCandidate &Cand, + GenericScheduler::SchedCandidate &TryCand) { + if (TryCand.Reason == GenericScheduler::NoCand) { + // TryCand is not selected by heuristic. Update the reason for Cand. + if (Cand.Reason > GenericScheduler::Stall) + Cand.Reason = GenericScheduler::Stall; + } else if (TryCand.Reason == GenericScheduler::NodeOrder) { + // TryCand is selected but it is because of the NodeOrder heuristic. We can + // still select it as Cand. + TryCand.Reason = GenericScheduler::NoCand; + Cand.Reason = GenericScheduler::Stall; + } +} + bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary &Zone) const { @@ -35,11 +64,11 @@ SchedCandidate &FirstCand = Zone.isTop() ? TryCand : Cand; SchedCandidate &SecondCand = Zone.isTop() ? Cand : TryCand; if (isADDIInstr(FirstCand) && SecondCand.SU->getInstr()->mayLoad()) { - TryCand.Reason = Stall; + pickTryCand(Cand, TryCand); return true; } if (FirstCand.SU->getInstr()->mayLoad() && isADDIInstr(SecondCand)) { - TryCand.Reason = NoCand; + pickCand(Cand, TryCand); return true; } @@ -54,11 +83,6 @@ if (!Cand.isValid() || !Zone) return; - // Add powerpc specific heuristic only when TryCand isn't selected or - // selected as node order. - if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) - return; - // There are some benefits to schedule the ADDI before the load to hide the // latency, as RA may create a true dependency between the load and addi. if (biasAddiLoadCandidate(Cand, TryCand, *Zone)) @@ -71,7 +95,12 @@ return false; if (isADDIInstr(TryCand) && !isADDIInstr(Cand)) { - TryCand.Reason = Stall; + pickTryCand(Cand, TryCand); + return true; + } + + if (isADDIInstr(Cand) && !isADDIInstr(TryCand)) { + pickCand(Cand, TryCand); return true; } return false; @@ -84,11 +113,6 @@ if (!Cand.isValid()) return; - // Add powerpc post ra specific heuristic only when TryCand isn't selected or - // selected as node order. - if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) - return; - // There are some benefits to schedule the ADDI as early as possible post ra // to avoid stalled by vector instructions which take up all the hw units. // And ADDI is usually used to post inc the loop indvar, which matters the diff --git a/llvm/test/CodeGen/PowerPC/botheightreduce.mir b/llvm/test/CodeGen/PowerPC/botheightreduce.mir --- a/llvm/test/CodeGen/PowerPC/botheightreduce.mir +++ b/llvm/test/CodeGen/PowerPC/botheightreduce.mir @@ -26,7 +26,6 @@ ; CHECK: [[LI8_6:%[0-9]+]]:g8rc = LI8 7 ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 [[ADDI8_]], 1 ; CHECK: [[LD:%[0-9]+]]:g8rc = LD 0, [[ADDI8_]] :: (load 8) ; CHECK: [[LDX:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_]] :: (load 8) ; CHECK: [[LDX1:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_3]] :: (load 8) @@ -34,9 +33,10 @@ ; CHECK: [[LDX2:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_4]] :: (load 8) ; CHECK: [[LDX3:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_5]] :: (load 8) ; CHECK: [[LDX4:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_6]] :: (load 8) - ; CHECK: [[LDX5:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_2]] :: (load 8) - ; CHECK: [[MULLD:%[0-9]+]]:g8rc = MULLD [[LDX]], [[LD]] ; CHECK: [[LD2:%[0-9]+]]:g8rc = LD 8, [[ADDI8_]] :: (load 8) + ; CHECK: [[MULLD:%[0-9]+]]:g8rc = MULLD [[LDX]], [[LD]] + ; CHECK: [[LDX5:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_2]] :: (load 8) + ; CHECK: [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 [[ADDI8_]], 1 ; CHECK: [[MULLD1:%[0-9]+]]:g8rc = MULLD [[MULLD]], [[LDX5]] ; CHECK: [[MULLD2:%[0-9]+]]:g8rc = MULLD [[MULLD1]], [[LDX1]] ; CHECK: [[MULLD3:%[0-9]+]]:g8rc = MULLD [[MULLD2]], [[LD1]] diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll --- a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll @@ -116,15 +116,14 @@ ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: ldx r9, r6, r7 -; CHECK-NEXT: ld r10, 0(r6) -; CHECK-NEXT: ldx r11, r6, r5 -; CHECK-NEXT: addi r8, r6, 1 -; CHECK-NEXT: ld r6, 4(r6) -; CHECK-NEXT: mulld r9, r10, r9 -; CHECK-NEXT: mulld r9, r9, r11 -; CHECK-NEXT: maddld r3, r9, r6, r3 -; CHECK-NEXT: mr r6, r8 +; CHECK-NEXT: ldx r8, r6, r7 +; CHECK-NEXT: ld r9, 0(r6) +; CHECK-NEXT: ldx r10, r6, r5 +; CHECK-NEXT: ld r11, 4(r6) +; CHECK-NEXT: addi r6, r6, 1 +; CHECK-NEXT: mulld r8, r9, r8 +; CHECK-NEXT: mulld r8, r8, r10 +; CHECK-NEXT: maddld r3, r8, r11, r3 ; CHECK-NEXT: bdnz .LBB1_2 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: add r3, r3, r4 @@ -217,25 +216,24 @@ ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: ldx r12, r9, r6 -; CHECK-NEXT: ld r0, 0(r9) -; CHECK-NEXT: ldx r30, r9, r5 -; CHECK-NEXT: ldx r29, r9, r7 -; CHECK-NEXT: addi r11, r9, 1 -; CHECK-NEXT: mulld r12, r0, r12 -; CHECK-NEXT: ld r28, 4(r9) -; CHECK-NEXT: ldx r27, r9, r8 -; CHECK-NEXT: ld r26, 12(r9) -; CHECK-NEXT: ld r25, 8(r9) -; CHECK-NEXT: ldx r9, r9, r10 -; CHECK-NEXT: mulld r12, r12, r30 -; CHECK-NEXT: mulld r12, r12, r29 -; CHECK-NEXT: mulld r12, r12, r28 -; CHECK-NEXT: mulld r12, r12, r27 -; CHECK-NEXT: mulld r12, r12, r26 -; CHECK-NEXT: mulld r12, r12, r25 -; CHECK-NEXT: maddld r3, r12, r9, r3 -; CHECK-NEXT: mr r9, r11 +; CHECK-NEXT: ldx r11, r9, r6 +; CHECK-NEXT: ld r12, 0(r9) +; CHECK-NEXT: ldx r0, r9, r5 +; CHECK-NEXT: ldx r30, r9, r7 +; CHECK-NEXT: mulld r11, r12, r11 +; CHECK-NEXT: ld r29, 4(r9) +; CHECK-NEXT: ldx r28, r9, r8 +; CHECK-NEXT: ld r27, 12(r9) +; CHECK-NEXT: ld r26, 8(r9) +; CHECK-NEXT: ldx r25, r9, r10 +; CHECK-NEXT: addi r9, r9, 1 +; CHECK-NEXT: mulld r11, r11, r0 +; CHECK-NEXT: mulld r11, r11, r30 +; CHECK-NEXT: mulld r11, r11, r29 +; CHECK-NEXT: mulld r11, r11, r28 +; CHECK-NEXT: mulld r11, r11, r27 +; CHECK-NEXT: mulld r11, r11, r26 +; CHECK-NEXT: maddld r3, r11, r25, r3 ; CHECK-NEXT: bdnz .LBB2_2 ; CHECK-NEXT: b .LBB2_4 ; CHECK-NEXT: .LBB2_3: @@ -624,10 +622,10 @@ ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: beq cr0, .LBB6_8 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: addis r5, r2, .LC0@toc@ha ; CHECK-NEXT: cmpldi r4, 1 ; CHECK-NEXT: li r7, 1 ; CHECK-NEXT: addi r6, r3, 4009 -; CHECK-NEXT: addis r5, r2, .LC0@toc@ha ; CHECK-NEXT: ld r5, .LC0@toc@l(r5) ; CHECK-NEXT: iselgt r8, r4, r7 ; CHECK-NEXT: lis r4, -21846 @@ -639,11 +637,11 @@ ; CHECK-NEXT: li r30, 1 ; CHECK-NEXT: ld r5, 0(r5) ; CHECK-NEXT: mtctr r8 -; CHECK-NEXT: li r8, -9 -; CHECK-NEXT: addi r5, r5, -1 ; CHECK-NEXT: ori r4, r4, 43691 +; CHECK-NEXT: li r8, -9 ; CHECK-NEXT: li r29, 1 ; CHECK-NEXT: li r28, 1 +; CHECK-NEXT: addi r5, r5, -1 ; CHECK-NEXT: b .LBB6_4 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB6_2: @@ -652,8 +650,8 @@ ; CHECK-NEXT: ld r0, -8(r6) ; CHECK-NEXT: add r29, r0, r29 ; CHECK-NEXT: .LBB6_3: -; CHECK-NEXT: addi r6, r6, 1 ; CHECK-NEXT: mulld r0, r29, r28 +; CHECK-NEXT: addi r6, r6, 1 ; CHECK-NEXT: mulld r0, r0, r30 ; CHECK-NEXT: mulld r0, r0, r12 ; CHECK-NEXT: mulld r0, r0, r11 @@ -802,8 +800,8 @@ ; CHECK-NEXT: cmpwi r4, 1 ; CHECK-NEXT: blt cr0, .LBB7_4 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: addi r3, r3, 4002 ; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: addi r3, r3, 4002 ; CHECK-NEXT: xxlxor f1, f1, f1 ; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: li r4, -1 @@ -884,8 +882,8 @@ ; CHECK-NEXT: cmpwi r4, 1 ; CHECK-NEXT: blt cr0, .LBB8_4 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: addi r3, r3, 4002 ; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: addi r3, r3, 4002 ; CHECK-NEXT: xxlxor f1, f1, f1 ; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: li r4, -1 diff --git a/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll b/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll --- a/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll +++ b/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll @@ -374,9 +374,9 @@ ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: ld 12, 384(1) # 8-byte Folded Reload ; CHECK-NEXT: lwz 4, 396(1) # 4-byte Folded Reload -; CHECK-NEXT: addi 4, 4, 1 ; CHECK-NEXT: std 3, 0(12) ; CHECK-NEXT: ld 12, 376(1) # 8-byte Folded Reload +; CHECK-NEXT: addi 4, 4, 1 ; CHECK-NEXT: std 3, 0(12) ; CHECK-NEXT: ld 12, 368(1) # 8-byte Folded Reload ; CHECK-NEXT: std 3, 0(12) diff --git a/llvm/test/CodeGen/PowerPC/sched-addi.ll b/llvm/test/CodeGen/PowerPC/sched-addi.ll --- a/llvm/test/CodeGen/PowerPC/sched-addi.ll +++ b/llvm/test/CodeGen/PowerPC/sched-addi.ll @@ -15,8 +15,8 @@ ; CHECK-P9-NEXT: ld 5, 0(5) ; CHECK-P9-NEXT: addis 6, 2, scalars@toc@ha ; CHECK-P9-NEXT: addi 6, 6, scalars@toc@l -; CHECK-P9-NEXT: addi 6, 6, 16 ; CHECK-P9-NEXT: rldicr 5, 5, 0, 58 +; CHECK-P9-NEXT: addi 6, 6, 16 ; CHECK-P9-NEXT: addi 5, 5, -32 ; CHECK-P9-NEXT: lxvdsx 0, 0, 6 ; CHECK-P9-NEXT: rldicl 5, 5, 59, 5 @@ -35,9 +35,9 @@ ; CHECK-P9-NEXT: xvmuldp 1, 1, 0 ; CHECK-P9-NEXT: xvmuldp 4, 4, 0 ; CHECK-P9-NEXT: xvmuldp 3, 3, 0 +; CHECK-P9-NEXT: xvmuldp 6, 6, 0 ; CHECK-P9-NEXT: xvmuldp 5, 5, 0 ; CHECK-P9-NEXT: addi 4, 4, 256 -; CHECK-P9-NEXT: xvmuldp 6, 6, 0 ; CHECK-P9-NEXT: stxv 1, 16(3) ; CHECK-P9-NEXT: stxv 2, 0(3) ; CHECK-P9-NEXT: stxv 3, 48(3) diff --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll --- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll +++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll @@ -22,22 +22,22 @@ ; CHECK-NEXT: isellt 3, 3, 4 ; CHECK-NEXT: li 4, 0 ; CHECK-NEXT: addi 3, 3, 1 -; CHECK-NEXT: li 7, -1 ; CHECK-NEXT: li 5, 0 +; CHECK-NEXT: li 7, -1 ; CHECK-NEXT: mtctr 3 -; CHECK-NEXT: li 3, 1 ; CHECK-NEXT: lbz 5, 0(5) +; CHECK-NEXT: li 3, 1 ; CHECK-NEXT: bdz .LBB0_6 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: addi 3, 3, 1 -; CHECK-NEXT: addi 8, 7, -1 ; CHECK-NEXT: xori 6, 5, 84 ; CHECK-NEXT: clrldi 5, 7, 32 +; CHECK-NEXT: addi 3, 3, 1 +; CHECK-NEXT: addi 8, 7, -1 ; CHECK-NEXT: lbz 5, 0(5) ; CHECK-NEXT: bdz .LBB0_5 ; CHECK-NEXT: # %bb.2: -; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: cntlzw 6, 6 +; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: srwi 7, 6, 5 ; CHECK-NEXT: xori 6, 5, 84 ; CHECK-NEXT: clrldi 5, 8, 32 @@ -45,12 +45,12 @@ ; CHECK-NEXT: lbz 5, 0(5) ; CHECK-NEXT: bdz .LBB0_4 ; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: clrldi 10, 8, 32 -; CHECK-NEXT: addi 8, 8, -1 ; CHECK-NEXT: cntlzw 9, 6 ; CHECK-NEXT: xori 6, 5, 84 +; CHECK-NEXT: addi 8, 8, -1 ; CHECK-NEXT: lbz 5, 0(10) +; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: add 4, 4, 7 ; CHECK-NEXT: srwi 7, 9, 5 ; CHECK-NEXT: bdnz .LBB0_3 diff --git a/llvm/test/CodeGen/PowerPC/sms-phi-1.ll b/llvm/test/CodeGen/PowerPC/sms-phi-1.ll --- a/llvm/test/CodeGen/PowerPC/sms-phi-1.ll +++ b/llvm/test/CodeGen/PowerPC/sms-phi-1.ll @@ -14,17 +14,17 @@ ; CHECK-NEXT: mr 30, 3 ; CHECK-NEXT: bl calloc ; CHECK-NEXT: nop +; CHECK-NEXT: clrldi 4, 30, 32 ; CHECK-NEXT: li 5, 0 ; CHECK-NEXT: addi 3, 3, -4 -; CHECK-NEXT: li 6, 1 -; CHECK-NEXT: clrldi 4, 30, 32 ; CHECK-NEXT: mtctr 4 ; CHECK-NEXT: mullw 4, 5, 5 +; CHECK-NEXT: li 6, 1 ; CHECK-NEXT: bdz .LBB0_3 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: addi 5, 6, 1 ; CHECK-NEXT: stwu 4, 4(3) ; CHECK-NEXT: mullw 4, 6, 6 +; CHECK-NEXT: addi 5, 6, 1 ; CHECK-NEXT: bdz .LBB0_3 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: diff --git a/llvm/test/CodeGen/PowerPC/sms-simple.ll b/llvm/test/CodeGen/PowerPC/sms-simple.ll --- a/llvm/test/CodeGen/PowerPC/sms-simple.ll +++ b/llvm/test/CodeGen/PowerPC/sms-simple.ll @@ -9,15 +9,15 @@ define dso_local i32* @foo() local_unnamed_addr { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addis r5, r2, x@toc@ha -; CHECK-NEXT: addis r6, r2, y@toc@ha +; CHECK-NEXT: addis r5, r2, y@toc@ha ; CHECK-NEXT: li r7, 340 -; CHECK-NEXT: addi r5, r5, x@toc@l -; CHECK-NEXT: addi r5, r5, -8 -; CHECK-NEXT: addi r3, r6, y@toc@l -; CHECK-NEXT: lwz r6, y@toc@l(r6) +; CHECK-NEXT: addi r3, r5, y@toc@l +; CHECK-NEXT: lwz r6, y@toc@l(r5) +; CHECK-NEXT: addis r5, r2, x@toc@ha ; CHECK-NEXT: mtctr r7 +; CHECK-NEXT: addi r5, r5, x@toc@l ; CHECK-NEXT: addi r4, r3, -8 +; CHECK-NEXT: addi r5, r5, -8 ; CHECK-NEXT: lwzu r7, 12(r5) ; CHECK-NEXT: maddld r6, r7, r7, r6 ; CHECK-NEXT: lwz r7, 4(r5) diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll --- a/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll +++ b/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll @@ -50,9 +50,9 @@ ; CHECK-P9-LE-NEXT: std r31, -8(r1) ; CHECK-P9-LE-NEXT: stdu r1, -48(r1) ; CHECK-P9-LE-NEXT: rldic r3, r3, 2, 30 -; CHECK-P9-LE-NEXT: addi r3, r3, 15 ; CHECK-P9-LE-NEXT: li r6, -32768 ; CHECK-P9-LE-NEXT: mr r31, r1 +; CHECK-P9-LE-NEXT: addi r3, r3, 15 ; CHECK-P9-LE-NEXT: rldicl r3, r3, 60, 4 ; CHECK-P9-LE-NEXT: rldicl r3, r3, 4, 29 ; CHECK-P9-LE-NEXT: neg r5, r3 @@ -189,9 +189,9 @@ ; CHECK-P9-LE-NEXT: std r31, -8(r1) ; CHECK-P9-LE-NEXT: stdu r1, -48(r1) ; CHECK-P9-LE-NEXT: rldic r4, r3, 2, 30 -; CHECK-P9-LE-NEXT: addi r4, r4, 15 ; CHECK-P9-LE-NEXT: li r7, -4096 ; CHECK-P9-LE-NEXT: mr r31, r1 +; CHECK-P9-LE-NEXT: addi r4, r4, 15 ; CHECK-P9-LE-NEXT: rldicl r4, r4, 60, 4 ; CHECK-P9-LE-NEXT: rldicl r4, r4, 4, 29 ; CHECK-P9-LE-NEXT: neg r6, r4 @@ -333,10 +333,10 @@ ; CHECK-P9-LE-NEXT: std r31, -8(r1) ; CHECK-P9-LE-NEXT: stdu r1, -48(r1) ; CHECK-P9-LE-NEXT: rldic r3, r3, 2, 30 -; CHECK-P9-LE-NEXT: addi r3, r3, 15 ; CHECK-P9-LE-NEXT: lis r5, -1 -; CHECK-P9-LE-NEXT: ori r5, r5, 0 ; CHECK-P9-LE-NEXT: mr r31, r1 +; CHECK-P9-LE-NEXT: addi r3, r3, 15 +; CHECK-P9-LE-NEXT: ori r5, r5, 0 ; CHECK-P9-LE-NEXT: rldicl r3, r3, 60, 4 ; CHECK-P9-LE-NEXT: rldicl r3, r3, 4, 29 ; CHECK-P9-LE-NEXT: neg r6, r3