Index: include/llvm/CodeGen/MachineScheduler.h =================================================================== --- include/llvm/CodeGen/MachineScheduler.h +++ include/llvm/CodeGen/MachineScheduler.h @@ -775,6 +775,15 @@ unsigned DemandResIdx; CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} + + bool operator==(const CandPolicy &RHS) const { + return ReduceLatency == RHS.ReduceLatency && + ReduceResIdx == RHS.ReduceResIdx && + DemandResIdx == RHS.DemandResIdx; + } + bool operator!=(const CandPolicy &RHS) const { + return !(*this == RHS); + } }; /// Status of an instruction's critical resource consumption. @@ -816,8 +825,17 @@ // Critical resource consumption of the best candidate. SchedResourceDelta ResDelta; - SchedCandidate(const CandPolicy &policy) - : Policy(policy), SU(nullptr), Reason(NoCand), RepeatReasonSet(0) {} + SchedCandidate() { reset(CandPolicy()); } + SchedCandidate(const CandPolicy &Policy) { reset(Policy); } + + void reset(const CandPolicy &NewPolicy) { + Policy = NewPolicy; + SU = nullptr; + Reason = NoCand; + RepeatReasonSet = 0; + RPDelta = RegPressureDelta(); + ResDelta = SchedResourceDelta(); + } bool isValid() const { return SU; } @@ -864,6 +882,11 @@ SchedBoundary Top; SchedBoundary Bot; + /// Candidate last picked from Top boundary. + SchedCandidate TopCand; + /// Candidate last picked from Bot boundary. + SchedCandidate BotCand; + MachineSchedPolicy RegionPolicy; public: GenericScheduler(const MachineSchedContext *C): @@ -892,10 +915,12 @@ void releaseTopNode(SUnit *SU) override { Top.releaseTopNode(SU); + TopCand.SU = nullptr; } void releaseBottomNode(SUnit *SU) override { Bot.releaseBottomNode(SU); + BotCand.SU = nullptr; } void registerRoots() override; Index: lib/CodeGen/MachineScheduler.cpp =================================================================== --- lib/CodeGen/MachineScheduler.cpp +++ lib/CodeGen/MachineScheduler.cpp @@ -2555,6 +2555,8 @@ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( Itin, DAG); } + TopCand.SU = nullptr; + BotCand.SU = nullptr; } /// Initialize the per-region scheduling policy. @@ -2920,19 +2922,34 @@ DEBUG(dbgs() << "Pick Top ONLY1\n"); return SU; } - CandPolicy NoPolicy; - SchedCandidate BotCand(NoPolicy); - SchedCandidate TopCand(NoPolicy); // Set the bottom-up policy based on the state of the current bottom zone and // the instructions outside the zone, including the top zone. - setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top); + CandPolicy BotPolicy; + setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); // Set the top-down policy based on the state of the current top zone and // the instructions outside the zone, including the bottom zone. - setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot); + CandPolicy TopPolicy; + setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); // Prefer bottom scheduling when heuristics are silent. - pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); - assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + + // See if BotCand is still valid (because we previously scheduled from Top). + if (!BotCand.isValid() || BotCand.SU->isScheduled || + BotCand.Policy != BotPolicy) { + BotCand.reset(BotPolicy); + pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); + assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + } else { + DEBUG(traceCandidate(BotCand)); +#ifndef NDEBUG + if (VerifyScheduling) { + SchedCandidate TCand; + TCand.reset(BotPolicy); + pickNodeFromQueue(Bot, DAG->getBotRPTracker(), TCand); + assert(TCand.SU == BotCand.SU); + } +#endif + } // If either Q has a single candidate that provides the least increase in // Excess pressure, we can immediately schedule from that Q. @@ -2948,9 +2965,24 @@ tracePick(BotCand, IsTopNode); return BotCand.SU; } + // Check if the top Q has a better candidate. - pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); - assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + if (!TopCand.isValid() || TopCand.SU->isScheduled || + TopCand.Policy != TopPolicy) { + TopCand.reset(TopPolicy); + pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); + assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + } else { + DEBUG(traceCandidate(TopCand)); +#ifndef NDEBUG + if (VerifyScheduling) { + SchedCandidate TCand; + TCand.reset(TopPolicy); + pickNodeFromQueue(Top, DAG->getTopRPTracker(), TCand); + assert(TCand.SU == TopCand.SU); + } +#endif + } // Choose the queue with the most important (lowest enum) reason. if (TopCand.Reason < BotCand.Reason) { @@ -2977,7 +3009,7 @@ SU = Top.pickOnlyChoice(); if (!SU) { CandPolicy NoPolicy; - SchedCandidate TopCand(NoPolicy); + TopCand.reset(NoPolicy); pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); assert(TopCand.Reason != NoCand && "failed to find a candidate"); tracePick(TopCand, true); @@ -2988,7 +3020,7 @@ SU = Bot.pickOnlyChoice(); if (!SU) { CandPolicy NoPolicy; - SchedCandidate BotCand(NoPolicy); + BotCand.reset(NoPolicy); pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); assert(BotCand.Reason != NoCand && "failed to find a candidate"); tracePick(BotCand, false); Index: test/CodeGen/AArch64/vector-fcopysign.ll =================================================================== --- test/CodeGen/AArch64/vector-fcopysign.ll +++ test/CodeGen/AArch64/vector-fcopysign.ll @@ -94,21 +94,21 @@ define <4 x float> @test_copysign_v4f32_v4f64(<4 x float> %a, <4 x double> %b) #0 { ; CHECK-LABEL: test_copysign_v4f32_v4f64: ; CHECK-NEXT: mov s3, v0[1] -; CHECK-NEXT: mov d4, v1[1] -; CHECK-NEXT: movi.4s v5, #0x80, lsl #24 -; CHECK-NEXT: fcvt s1, d1 +; CHECK-NEXT: movi.4s v4, #0x80, lsl #24 +; CHECK-NEXT: fcvt s5, d1 ; CHECK-NEXT: mov s6, v0[2] ; CHECK-NEXT: mov s7, v0[3] -; CHECK-NEXT: fcvt s16, d2 -; CHECK-NEXT: bit.16b v0, v1, v5 -; CHECK-NEXT: bit.16b v6, v16, v5 -; CHECK-NEXT: fcvt s1, d4 -; CHECK-NEXT: bit.16b v3, v1, v5 +; CHECK-NEXT: bit.16b v0, v5, v4 +; CHECK-NEXT: fcvt s5, d2 +; CHECK-NEXT: bit.16b v6, v5, v4 +; CHECK-NEXT: mov d1, v1[1] +; CHECK-NEXT: fcvt s1, d1 +; CHECK-NEXT: bit.16b v3, v1, v4 ; CHECK-NEXT: mov d1, v2[1] ; CHECK-NEXT: fcvt s1, d1 ; CHECK-NEXT: ins.s v0[1], v3[0] ; CHECK-NEXT: ins.s v0[2], v6[0] -; CHECK-NEXT: bit.16b v7, v1, v5 +; CHECK-NEXT: bit.16b v7, v1, v4 ; CHECK-NEXT: ins.s v0[3], v7[0] ; CHECK-NEXT: ret %tmp0 = fptrunc <4 x double> %b to <4 x float>