Index: llvm/trunk/lib/CodeGen/MachineScheduler.cpp =================================================================== --- llvm/trunk/lib/CodeGen/MachineScheduler.cpp +++ llvm/trunk/lib/CodeGen/MachineScheduler.cpp @@ -1837,9 +1837,15 @@ /// Given a Count of resource usage and a Latency value, return true if a /// SchedBoundary becomes resource limited. +/// If we are checking after scheduling a node, we should return true when +/// we just reach the resource limit. static bool checkResourceLimit(unsigned LFactor, unsigned Count, - unsigned Latency) { - return (int)(Count - (Latency * LFactor)) > (int)LFactor; + unsigned Latency, bool AfterSchedNode) { + int ResCntFactor = (int)(Count - (Latency * LFactor)); + if (AfterSchedNode) + return ResCntFactor >= (int)LFactor; + else + return ResCntFactor > (int)LFactor; } void SchedBoundary::reset() { @@ -2134,7 +2140,7 @@ CheckPending = true; IsResourceLimited = checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), - getScheduledLatency()); + getScheduledLatency(), true); LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); @@ -2302,7 +2308,7 @@ // resource limited. If a stall occurred, bumpCycle does this. IsResourceLimited = checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), - getScheduledLatency()); + getScheduledLatency(), true); // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle // resets CurrMOps. Loop to handle instructions with more MOps than issue in @@ -2521,7 +2527,7 @@ RemLatency = computeRemLatency(CurrZone); RemLatencyComputed = true; OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(), - OtherCount, RemLatency); + OtherCount, RemLatency, false); } // Schedule aggressively for latency in PostRA mode. We don't check for Index: llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll +++ llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll @@ -2024,8 +2024,8 @@ ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfd f0, 24(r3) ; P9LE-NEXT: lfd f2, 8(r3) -; P9LE-NEXT: lfd f1, 16(r3) ; P9LE-NEXT: xxmrghd vs0, vs2, vs0 +; P9LE-NEXT: lfd f1, 16(r3) ; P9LE-NEXT: lfd f3, 0(r3) ; P9LE-NEXT: xvcvdpsxws v2, vs0 ; P9LE-NEXT: xxmrghd vs0, vs3, vs1 @@ -3608,8 +3608,8 @@ ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfd f0, 24(r3) ; P9LE-NEXT: lfd f2, 8(r3) -; P9LE-NEXT: lfd f1, 16(r3) ; P9LE-NEXT: xxmrghd vs0, vs2, vs0 +; P9LE-NEXT: lfd f1, 16(r3) ; P9LE-NEXT: lfd f3, 0(r3) ; P9LE-NEXT: xvcvdpuxws v2, vs0 ; P9LE-NEXT: xxmrghd vs0, vs3, vs1 Index: llvm/trunk/test/CodeGen/PowerPC/csr-save-restore-order.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/csr-save-restore-order.ll +++ llvm/trunk/test/CodeGen/PowerPC/csr-save-restore-order.ll @@ -57,9 +57,9 @@ ; CHECK-PWR9-NEXT: .cfi_offset v31, -304 ; CHECK-PWR9-NEXT: std r14, 240(r1) # 8-byte Folded Spill ; CHECK-PWR9-NEXT: std r15, 248(r1) # 8-byte Folded Spill -; CHECK-PWR9-NEXT: std r16, 256(r1) # 8-byte Folded Spill ; CHECK-PWR9-NEXT: stxv v20, 48(r1) # 16-byte Folded Spill ; CHECK-PWR9-NEXT: stxv v21, 64(r1) # 16-byte Folded Spill +; CHECK-PWR9-NEXT: std r16, 256(r1) # 8-byte Folded Spill ; CHECK-PWR9-NEXT: stxv v22, 80(r1) # 16-byte Folded Spill ; CHECK-PWR9-NEXT: std r17, 264(r1) # 8-byte Folded Spill ; CHECK-PWR9-NEXT: stxv v23, 96(r1) # 16-byte Folded Spill @@ -112,8 +112,8 @@ ; CHECK-PWR9-NEXT: ld r4, 32(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: lxv v31, 224(r1) # 16-byte Folded Reload ; CHECK-PWR9-NEXT: lxv v30, 208(r1) # 16-byte Folded Reload -; CHECK-PWR9-NEXT: lxv v29, 192(r1) # 16-byte Folded Reload ; CHECK-PWR9-NEXT: add r3, r4, r3 +; CHECK-PWR9-NEXT: lxv v29, 192(r1) # 16-byte Folded Reload ; CHECK-PWR9-NEXT: lxv v28, 176(r1) # 16-byte Folded Reload ; CHECK-PWR9-NEXT: lxv v27, 160(r1) # 16-byte Folded Reload ; CHECK-PWR9-NEXT: lxv v26, 144(r1) # 16-byte Folded Reload @@ -128,11 +128,11 @@ ; CHECK-PWR9-NEXT: lfd f29, 504(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: lfd f28, 496(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: lfd f27, 488(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f26, 480(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: ld r31, 376(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: ld r30, 368(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: ld r29, 360(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: ld r28, 352(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f26, 480(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: ld r27, 344(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: ld r26, 336(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: ld r25, 328(r1) # 8-byte Folded Reload Index: llvm/trunk/test/CodeGen/PowerPC/f128-passByValue.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/f128-passByValue.ll +++ llvm/trunk/test/CodeGen/PowerPC/f128-passByValue.ll @@ -85,6 +85,7 @@ ; CHECK-LABEL: maxVecParam: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xsaddqp v2, v2, v3 +; CHECK-NEXT: lxv v[[REG0:[0-9]+]], 224(r1) ; CHECK-NEXT: xsaddqp v2, v2, v4 ; CHECK-NEXT: xsaddqp v2, v2, v5 ; CHECK-NEXT: xsaddqp v2, v2, v6 @@ -95,7 +96,6 @@ ; CHECK-NEXT: xsaddqp v2, v2, v11 ; CHECK-NEXT: xsaddqp v2, v2, v12 ; CHECK-NEXT: xsaddqp v2, v2, v13 -; CHECK-NEXT: lxv v[[REG0:[0-9]+]], 224(r1) ; CHECK-NEXT: xssubqp v2, v2, v[[REG0]] ; CHECK-NEXT: blr fp128 %p6, fp128 %p7, fp128 %p8, fp128 %p9, fp128 %p10, Index: llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll +++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_fp_to_i_8byte_elts.ll @@ -167,15 +167,15 @@ ; CHECK-P9-NEXT: lxv vs1, 96(r4) ; CHECK-P9-NEXT: lxv vs2, 80(r4) ; CHECK-P9-NEXT: lxv vs3, 64(r4) +; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3 ; CHECK-P9-NEXT: lxv vs4, 48(r4) -; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4 ; CHECK-P9-NEXT: lxv vs5, 32(r4) ; CHECK-P9-NEXT: lxv vs6, 16(r4) ; CHECK-P9-NEXT: lxv vs7, 0(r4) ; CHECK-P9-NEXT: xvcvdpuxds vs7, vs7 ; CHECK-P9-NEXT: xvcvdpuxds vs6, vs6 ; CHECK-P9-NEXT: xvcvdpuxds vs5, vs5 -; CHECK-P9-NEXT: xvcvdpuxds vs3, vs3 +; CHECK-P9-NEXT: xvcvdpuxds vs4, vs4 ; CHECK-P9-NEXT: xvcvdpuxds vs2, vs2 ; CHECK-P9-NEXT: xvcvdpuxds vs1, vs1 ; CHECK-P9-NEXT: xvcvdpuxds vs0, vs0 @@ -195,15 +195,15 @@ ; CHECK-BE-NEXT: lxv vs1, 96(r4) ; CHECK-BE-NEXT: lxv vs2, 80(r4) ; CHECK-BE-NEXT: lxv vs3, 64(r4) +; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3 ; CHECK-BE-NEXT: lxv vs4, 48(r4) -; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4 ; CHECK-BE-NEXT: lxv vs5, 32(r4) ; CHECK-BE-NEXT: lxv vs6, 16(r4) ; CHECK-BE-NEXT: lxv vs7, 0(r4) ; CHECK-BE-NEXT: xvcvdpuxds vs7, vs7 ; CHECK-BE-NEXT: xvcvdpuxds vs6, vs6 ; CHECK-BE-NEXT: xvcvdpuxds vs5, vs5 -; CHECK-BE-NEXT: xvcvdpuxds vs3, vs3 +; CHECK-BE-NEXT: xvcvdpuxds vs4, vs4 ; CHECK-BE-NEXT: xvcvdpuxds vs2, vs2 ; CHECK-BE-NEXT: xvcvdpuxds vs1, vs1 ; CHECK-BE-NEXT: xvcvdpuxds vs0, vs0 @@ -381,15 +381,15 @@ ; CHECK-P9-NEXT: lxv vs1, 96(r4) ; CHECK-P9-NEXT: lxv vs2, 80(r4) ; CHECK-P9-NEXT: lxv vs3, 64(r4) +; CHECK-P9-NEXT: xvcvdpsxds vs3, vs3 ; CHECK-P9-NEXT: lxv vs4, 48(r4) -; CHECK-P9-NEXT: xvcvdpsxds vs4, vs4 ; CHECK-P9-NEXT: lxv vs5, 32(r4) ; CHECK-P9-NEXT: lxv vs6, 16(r4) ; CHECK-P9-NEXT: lxv vs7, 0(r4) ; CHECK-P9-NEXT: xvcvdpsxds vs7, vs7 ; CHECK-P9-NEXT: xvcvdpsxds vs6, vs6 ; CHECK-P9-NEXT: xvcvdpsxds vs5, vs5 -; CHECK-P9-NEXT: xvcvdpsxds vs3, vs3 +; CHECK-P9-NEXT: xvcvdpsxds vs4, vs4 ; CHECK-P9-NEXT: xvcvdpsxds vs2, vs2 ; CHECK-P9-NEXT: xvcvdpsxds vs1, vs1 ; CHECK-P9-NEXT: xvcvdpsxds vs0, vs0 @@ -409,15 +409,15 @@ ; CHECK-BE-NEXT: lxv vs1, 96(r4) ; CHECK-BE-NEXT: lxv vs2, 80(r4) ; CHECK-BE-NEXT: lxv vs3, 64(r4) +; CHECK-BE-NEXT: xvcvdpsxds vs3, vs3 ; CHECK-BE-NEXT: lxv vs4, 48(r4) -; CHECK-BE-NEXT: xvcvdpsxds vs4, vs4 ; CHECK-BE-NEXT: lxv vs5, 32(r4) ; CHECK-BE-NEXT: lxv vs6, 16(r4) ; CHECK-BE-NEXT: lxv vs7, 0(r4) ; CHECK-BE-NEXT: xvcvdpsxds vs7, vs7 ; CHECK-BE-NEXT: xvcvdpsxds vs6, vs6 ; CHECK-BE-NEXT: xvcvdpsxds vs5, vs5 -; CHECK-BE-NEXT: xvcvdpsxds vs3, vs3 +; CHECK-BE-NEXT: xvcvdpsxds vs4, vs4 ; CHECK-BE-NEXT: xvcvdpsxds vs2, vs2 ; CHECK-BE-NEXT: xvcvdpsxds vs1, vs1 ; CHECK-BE-NEXT: xvcvdpsxds vs0, vs0 Index: llvm/trunk/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll +++ llvm/trunk/test/CodeGen/PowerPC/vec_conv_i_to_fp_8byte_elts.ll @@ -167,15 +167,15 @@ ; CHECK-P9-NEXT: lxv v3, 96(r4) ; CHECK-P9-NEXT: lxv v4, 80(r4) ; CHECK-P9-NEXT: lxv v5, 64(r4) +; CHECK-P9-NEXT: xvcvuxddp vs4, v5 ; CHECK-P9-NEXT: lxv v0, 48(r4) -; CHECK-P9-NEXT: xvcvuxddp vs3, v0 ; CHECK-P9-NEXT: lxv v1, 32(r4) ; CHECK-P9-NEXT: lxv v6, 16(r4) ; CHECK-P9-NEXT: lxv v7, 0(r4) ; CHECK-P9-NEXT: xvcvuxddp vs0, v7 ; CHECK-P9-NEXT: xvcvuxddp vs1, v6 ; CHECK-P9-NEXT: xvcvuxddp vs2, v1 -; CHECK-P9-NEXT: xvcvuxddp vs4, v5 +; CHECK-P9-NEXT: xvcvuxddp vs3, v0 ; CHECK-P9-NEXT: xvcvuxddp vs5, v4 ; CHECK-P9-NEXT: xvcvuxddp vs6, v3 ; CHECK-P9-NEXT: xvcvuxddp vs7, v2 @@ -195,15 +195,15 @@ ; CHECK-BE-NEXT: lxv v3, 96(r4) ; CHECK-BE-NEXT: lxv v4, 80(r4) ; CHECK-BE-NEXT: lxv v5, 64(r4) +; CHECK-BE-NEXT: xvcvuxddp vs4, v5 ; CHECK-BE-NEXT: lxv v0, 48(r4) -; CHECK-BE-NEXT: xvcvuxddp vs3, v0 ; CHECK-BE-NEXT: lxv v1, 32(r4) ; CHECK-BE-NEXT: lxv v6, 16(r4) ; CHECK-BE-NEXT: lxv v7, 0(r4) ; CHECK-BE-NEXT: xvcvuxddp vs0, v7 ; CHECK-BE-NEXT: xvcvuxddp vs1, v6 ; CHECK-BE-NEXT: xvcvuxddp vs2, v1 -; CHECK-BE-NEXT: xvcvuxddp vs4, v5 +; CHECK-BE-NEXT: xvcvuxddp vs3, v0 ; CHECK-BE-NEXT: xvcvuxddp vs5, v4 ; CHECK-BE-NEXT: xvcvuxddp vs6, v3 ; CHECK-BE-NEXT: xvcvuxddp vs7, v2 @@ -381,15 +381,15 @@ ; CHECK-P9-NEXT: lxv v3, 96(r4) ; CHECK-P9-NEXT: lxv v4, 80(r4) ; CHECK-P9-NEXT: lxv v5, 64(r4) +; CHECK-P9-NEXT: xvcvsxddp vs4, v5 ; CHECK-P9-NEXT: lxv v0, 48(r4) -; CHECK-P9-NEXT: xvcvsxddp vs3, v0 ; CHECK-P9-NEXT: lxv v1, 32(r4) ; CHECK-P9-NEXT: lxv v6, 16(r4) ; CHECK-P9-NEXT: lxv v7, 0(r4) ; CHECK-P9-NEXT: xvcvsxddp vs0, v7 ; CHECK-P9-NEXT: xvcvsxddp vs1, v6 ; CHECK-P9-NEXT: xvcvsxddp vs2, v1 -; CHECK-P9-NEXT: xvcvsxddp vs4, v5 +; CHECK-P9-NEXT: xvcvsxddp vs3, v0 ; CHECK-P9-NEXT: xvcvsxddp vs5, v4 ; CHECK-P9-NEXT: xvcvsxddp vs6, v3 ; CHECK-P9-NEXT: xvcvsxddp vs7, v2 @@ -409,15 +409,15 @@ ; CHECK-BE-NEXT: lxv v3, 96(r4) ; CHECK-BE-NEXT: lxv v4, 80(r4) ; CHECK-BE-NEXT: lxv v5, 64(r4) +; CHECK-BE-NEXT: xvcvsxddp vs4, v5 ; CHECK-BE-NEXT: lxv v0, 48(r4) -; CHECK-BE-NEXT: xvcvsxddp vs3, v0 ; CHECK-BE-NEXT: lxv v1, 32(r4) ; CHECK-BE-NEXT: lxv v6, 16(r4) ; CHECK-BE-NEXT: lxv v7, 0(r4) ; CHECK-BE-NEXT: xvcvsxddp vs0, v7 ; CHECK-BE-NEXT: xvcvsxddp vs1, v6 ; CHECK-BE-NEXT: xvcvsxddp vs2, v1 -; CHECK-BE-NEXT: xvcvsxddp vs4, v5 +; CHECK-BE-NEXT: xvcvsxddp vs3, v0 ; CHECK-BE-NEXT: xvcvsxddp vs5, v4 ; CHECK-BE-NEXT: xvcvsxddp vs6, v3 ; CHECK-BE-NEXT: xvcvsxddp vs7, v2