Index: include/llvm/CodeGen/MachineScheduler.h =================================================================== --- include/llvm/CodeGen/MachineScheduler.h +++ include/llvm/CodeGen/MachineScheduler.h @@ -897,6 +897,28 @@ #endif }; +// Utility functions used by heuristics in tryCand(). +bool tryLess(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason); +bool tryGreater(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason); +bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + SchedBoundary &Zone); +bool tryPressure(const PressureChange &TryP, + const PressureChange &CandP, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason, + const TargetRegisterInfo *TRI, + const MachineFunction &MF); +unsigned getWeakLeft(const SUnit *SU, bool isTop); +int biasPhysRegCopy(const SUnit *SU, bool isTop); + /// GenericScheduler shrinks the unscheduled zone using heuristics to balance /// the schedule. class GenericScheduler : public GenericSchedulerBase { @@ -963,9 +985,8 @@ const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker); - void tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary *Zone); + virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) const; SUnit *pickNodeBidirectional(bool &IsTopNode); Index: lib/CodeGen/MachineScheduler.cpp =================================================================== --- lib/CodeGen/MachineScheduler.cpp +++ lib/CodeGen/MachineScheduler.cpp @@ -2561,11 +2561,12 @@ } #endif +namespace llvm { /// Return true if this heuristic determines order. -static bool tryLess(int TryVal, int CandVal, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason) { +bool tryLess(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { if (TryVal < CandVal) { TryCand.Reason = Reason; return true; @@ -2578,10 +2579,10 @@ return false; } -static bool tryGreater(int TryVal, int CandVal, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason) { +bool tryGreater(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { if (TryVal > CandVal) { TryCand.Reason = Reason; return true; @@ -2594,9 +2595,9 @@ return false; } -static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - SchedBoundary &Zone) { +bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + SchedBoundary &Zone) { if (Zone.isTop()) { if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), @@ -2618,6 +2619,7 @@ } return false; } +} // end namespace llvm static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) { DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") @@ -2772,13 +2774,14 @@ } } -static bool tryPressure(const PressureChange &TryP, - const PressureChange &CandP, - GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason, - const TargetRegisterInfo *TRI, - const MachineFunction &MF) { +namespace llvm { +bool tryPressure(const PressureChange &TryP, + const PressureChange &CandP, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason, + const TargetRegisterInfo *TRI, + const MachineFunction &MF) { // If one candidate decreases and the other increases, go with it. // Invalid candidates have UnitInc==0. if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, @@ -2811,7 +2814,7 @@ return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); } -static unsigned getWeakLeft(const SUnit *SU, bool isTop) { +unsigned getWeakLeft(const SUnit *SU, bool isTop) { return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; } @@ -2822,7 +2825,7 @@ /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled /// with the operation that produces or consumes the physreg. We'll do this when /// regalloc has support for parallel copies. -static int biasPhysRegCopy(const SUnit *SU, bool isTop) { +int biasPhysRegCopy(const SUnit *SU, bool isTop) { const MachineInstr *MI = SU->getInstr(); if (!MI->isCopy()) return 0; @@ -2842,6 +2845,7 @@ return AtBoundary ? -1 : 1; return 0; } +} // end namespace llvm void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, @@ -2892,7 +2896,7 @@ // if Cand is from a different zone than TryCand. void GenericScheduler::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, - SchedBoundary *Zone) { + SchedBoundary *Zone) const { // Initialize the candidate if needed. if (!Cand.isValid()) { TryCand.Reason = NodeOrder; Index: lib/Target/AMDGPU/SIMachineScheduler.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineScheduler.cpp +++ lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -154,6 +154,7 @@ #endif +namespace llvm { namespace SISched { static bool tryLess(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, @@ -187,6 +188,7 @@ Cand.setRepeat(Reason); return false; } +}} // end namespace llvm::SISched // SIScheduleBlock // @@ -212,7 +214,8 @@ } if (Cand.SGPRUsage > 60 && - tryLess(TryCand.SGPRUsage, Cand.SGPRUsage, TryCand, Cand, RegUsage)) + SISched::tryLess(TryCand.SGPRUsage, Cand.SGPRUsage, + TryCand, Cand, RegUsage)) return; // Schedule low latency instructions as top as possible. @@ -230,21 +233,22 @@ // could go quite high, thus above the arbitrary limit of 60 will encourage // use the already loaded constants (in order to release some SGPRs) before // loading more. - if (tryLess(TryCand.HasLowLatencyNonWaitedParent, - Cand.HasLowLatencyNonWaitedParent, - TryCand, Cand, SIScheduleCandReason::Depth)) + if (SISched::tryLess(TryCand.HasLowLatencyNonWaitedParent, + Cand.HasLowLatencyNonWaitedParent, + TryCand, Cand, SIScheduleCandReason::Depth)) return; - if (tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency, - TryCand, Cand, SIScheduleCandReason::Depth)) + if (SISched::tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency, + TryCand, Cand, SIScheduleCandReason::Depth)) return; if (TryCand.IsLowLatency && - tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset, - TryCand, Cand, SIScheduleCandReason::Depth)) + SISched::tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset, + TryCand, Cand, SIScheduleCandReason::Depth)) return; - if (tryLess(TryCand.VGPRUsage, Cand.VGPRUsage, TryCand, Cand, RegUsage)) + if (SISched::tryLess(TryCand.VGPRUsage, Cand.VGPRUsage, + TryCand, Cand, RegUsage)) return; // Fall through to original instruction order. @@ -1576,19 +1580,19 @@ } // Try to hide high latencies. - if (tryLess(TryCand.LastPosHighLatParentScheduled, - Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency)) + if (SISched::tryLess(TryCand.LastPosHighLatParentScheduled, + Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency)) return true; // Schedule high latencies early so you can hide them better. - if (tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency, - TryCand, Cand, Latency)) + if (SISched::tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency, + TryCand, Cand, Latency)) return true; - if (TryCand.IsHighLatency && tryGreater(TryCand.Height, Cand.Height, - TryCand, Cand, Depth)) + if (TryCand.IsHighLatency && SISched::tryGreater(TryCand.Height, Cand.Height, + TryCand, Cand, Depth)) return true; - if (tryGreater(TryCand.NumHighLatencySuccessors, - Cand.NumHighLatencySuccessors, - TryCand, Cand, Successor)) + if (SISched::tryGreater(TryCand.NumHighLatencySuccessors, + Cand.NumHighLatencySuccessors, + TryCand, Cand, Successor)) return true; return false; } @@ -1600,17 +1604,17 @@ return true; } - if (tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0, - TryCand, Cand, RegUsage)) + if (SISched::tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0, + TryCand, Cand, RegUsage)) return true; - if (tryGreater(TryCand.NumSuccessors > 0, - Cand.NumSuccessors > 0, - TryCand, Cand, Successor)) + if (SISched::tryGreater(TryCand.NumSuccessors > 0, + Cand.NumSuccessors > 0, + TryCand, Cand, Successor)) return true; - if (tryGreater(TryCand.Height, Cand.Height, TryCand, Cand, Depth)) + if (SISched::tryGreater(TryCand.Height, Cand.Height, TryCand, Cand, Depth)) return true; - if (tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff, - TryCand, Cand, RegUsage)) + if (SISched::tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff, + TryCand, Cand, RegUsage)) return true; return false; } Index: lib/Target/SystemZ/SystemZMachineScheduler.h =================================================================== --- lib/Target/SystemZ/SystemZMachineScheduler.h +++ lib/Target/SystemZ/SystemZMachineScheduler.h @@ -142,6 +142,21 @@ void releaseBottomNode(SUnit *SU) override {}; }; +class SystemZPreRASchedStrategy : public GenericScheduler { + const SystemZSubtarget *ST; + + // The VectorUnit index is 6 for both z13 and z14. + const unsigned SystemZVectorUnitIdx = 6; + + public: + SystemZPreRASchedStrategy(const MachineSchedContext *C) : + GenericScheduler(C), ST(&C->MF->getSubtarget()) {} + + void tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) const override; +}; + } // end namespace llvm #endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H Index: lib/Target/SystemZ/SystemZMachineScheduler.cpp =================================================================== --- lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -256,3 +256,131 @@ // Put all released SUs in the Available set. Available.insert(SU); } + + +//////////// Pre-RA scheduling + +// This is mostly copied from MachineScheduler.cpp. +void SystemZPreRASchedStrategy:: +tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) const { + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; + return; + } + + if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop), + biasPhysRegCopy(Cand.SU, Cand.AtTop), + TryCand, Cand, PhysRegCopy)) + return; + + // Avoid exceeding the target's limit. + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, + Cand.RPDelta.Excess, + TryCand, Cand, RegExcess, TRI, + DAG->MF)) + return; + + // Avoid increasing the max critical pressure in the scheduled region. + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, + Cand.RPDelta.CriticalMax, + TryCand, Cand, RegCritical, TRI, + DAG->MF)) + return; + + // We only compare a subset of features when comparing nodes between + // Top and Bottom boundary. Some properties are simply incomparable, in many + // other instances we should only override the other boundary if something + // is a clear good pick on one boundary. Skip heuristics that are more + // "tie-breaking" in nature. + bool SameBoundary = Zone != nullptr; + if (SameBoundary) { + // For loops that are acyclic path limited, aggressively schedule for + // latency. Within an single cycle, whenever CurrMOps > 0, allow normal + // heuristics to take precedence. + if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && + tryLatency(TryCand, Cand, *Zone)) + return; + + // Prioritize instructions that read unbuffered resources by stall cycles. + if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), + Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) + return; + } + + // Keep clustered nodes together to encourage downstream peephole + // optimizations which may reduce resource requirements. + // + // This is a best effort to set things up for a post-RA pass. Optimizations + // like generating loads of multiple registers should ideally be done within + // the scheduler pass by combining the loads during DAG postprocessing. + const SUnit *CandNextClusterSU = + Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + const SUnit *TryCandNextClusterSU = + TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + if (tryGreater(TryCand.SU == TryCandNextClusterSU, + Cand.SU == CandNextClusterSU, + TryCand, Cand, Cluster)) + return; + + if (SameBoundary) { + // Weak edges are for clustering and other constraints. + if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), + getWeakLeft(Cand.SU, Cand.AtTop), + TryCand, Cand, Weak)) + return; + } + + // Avoid increasing the max pressure of the entire region. + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, + Cand.RPDelta.CurrentMax, + TryCand, Cand, RegMax, TRI, + DAG->MF)) + return; + + // SystemZ specific: Latency boost for instructions using the vector unit. + if (ST->hasVector()) { + assert ((std:: + string(SchedModel->getProcResource(SystemZVectorUnitIdx)->Name) + .find("VecUnit") != std::string::npos) && + "Hard coded index for vector unit changed!"); + bool VectorPipeline = false; + const MCSchedClassDesc *SC = DAG->getSchedClass(TryCand.SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + if (PI->ProcResourceIdx == SystemZVectorUnitIdx) { + VectorPipeline = true; + break; + } + } + if (VectorPipeline && tryLatency(TryCand, Cand, *Zone)) + return; + } + + if (SameBoundary) { + // Avoid critical resource consumption and balance the schedule. + TryCand.initResourceDelta(DAG, SchedModel); + if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand, ResourceReduce)) + return; + if (tryGreater(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, + TryCand, Cand, ResourceDemand)) + return; + + // Avoid serializing long latency dependence chains. + // For acyclic path limited loops, latency was already checked above. + if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && + !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) + return; + + // Fall through to original instruction order. + if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) + || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { + TryCand.Reason = NodeOrder; + } + } +} Index: lib/Target/SystemZ/SystemZTargetMachine.cpp =================================================================== --- lib/Target/SystemZ/SystemZTargetMachine.cpp +++ lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -167,6 +167,17 @@ } ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + // To run the generic pre-RA scheduler use: -misched=converge + ScheduleDAGMILive *DAG = + new ScheduleDAGMILive(C, llvm::make_unique(C)); + + // Use same DAG mutators as are applied in createGenericSchedLive(). + DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); + return DAG; + } + + ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { return new ScheduleDAGMI(C, llvm::make_unique(C), Index: test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll =================================================================== --- test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll +++ test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll @@ -64,7 +64,7 @@ ; CHECK-DAG: vceqh [[REG4:%v[0-9]+]], %v30, %v27 ; CHECK-DAG: vl [[REG5:%v[0-9]+]], 176(%r15) ; CHECK-DAG: vl [[REG6:%v[0-9]+]], 160(%r15) -; CHECK-DAG: vo [[REG7:%v[0-9]+]], %v2, [[REG4]] +; CHECK-DAG: vo [[REG7:%v[0-9]+]], [[REG1]], [[REG4]] ; CHECK-DAG: vo [[REG8:%v[0-9]+]], [[REG2]], [[REG3]] ; CHECK-DAG: vsel %v24, %v29, [[REG6]], [[REG8]] ; CHECK-DAG: vsel %v26, %v31, [[REG5]], [[REG7]] @@ -439,9 +439,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vceqg %v0, %v26, %v30 ; CHECK-NEXT: vceqg %v1, %v24, %v28 -; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vceqf %v1, %v25, %v27 -; CHECK-NEXT: vx %v0, %v0, %v1 +; CHECK-DAG: vpkg %v0, %v1, %v0 +; CHECK-DAG: vceqf [[REG0:%v[0-9]+]], %v25, %v27 +; CHECK-NEXT: vx %v0, %v0, [[REG0]] ; CHECK-NEXT: vsel %v24, %v29, %v31, %v0 ; CHECK-NEXT: br %r14 %cmp0 = icmp eq <4 x i64> %val1, %val2 @@ -479,18 +479,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vfchdb %v1, %v28, %v30 -; CHECK-NEXT: vpkg %v1, %v1, %v1 -; CHECK-NEXT: vo %v0, %v0, %v1 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb %v0, %v1, %v0 +; CHECK-DAG: vmrhf [[REG0:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb %v1, [[REG1]], [[REG0]] +; CHECK-DAG: vpkg %v0, %v1, %v0 +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v28, %v30 +; CHECK-DAG: vpkg [[REG2]], [[REG2]], [[REG2]] +; CHECK-NEXT: vo %v0, %v0, [[REG2]] ; CHECK-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-NEXT: br %r14 ; @@ -514,26 +514,26 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vuphf %v0, %v0 -; CHECK-NEXT: vfchdb %v1, %v28, %v30 -; CHECK-NEXT: vo %v0, %v0, %v1 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb %v0, %v1, %v0 +; CHECK-DAG: vmrhf [[REG0:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb %v1, [[REG1]], [[REG0]] +; CHECK-DAG: vpkg %v0, %v1, %v0 +; CHECK-DAG: vuphf %v0, %v0 +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v28, %v30 +; CHECK-NEXT: vo %v0, %v0, [[REG2]] ; CHECK-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-NEXT: br %r14 ; ; CHECK-Z14-LABEL: fun26: ; CHECK-Z14: # %bb.0: ; CHECK-Z14-NEXT: vfchsb %v0, %v24, %v26 -; CHECK-Z14-NEXT: vuphf %v0, %v0 -; CHECK-Z14-NEXT: vfchdb %v1, %v28, %v30 +; CHECK-Z14-DAG: vuphf %v0, %v0 +; CHECK-Z14-DAG: vfchdb %v1, %v28, %v30 ; CHECK-Z14-NEXT: vo %v0, %v0, %v1 ; CHECK-Z14-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-Z14-NEXT: br %r14 @@ -585,8 +585,8 @@ ; CHECK-DAG: vmrhf [[REG17:%v[0-9]+]], %v30, %v30 ; CHECK-DAG: vldeb [[REG19:%v[0-9]+]], [[REG17]] ; CHECK-DAG: vldeb [[REG20:%v[0-9]+]], [[REG8]] -; CHECK-NEXT: vfchdb %v2, [[REG20]], [[REG19]] -; CHECK-NEXT: vpkg [[REG21:%v[0-9]+]], %v2, [[REG16]] +; CHECK-NEXT: vfchdb [[REG22:%v[0-9]+]], [[REG20]], [[REG19]] +; CHECK-NEXT: vpkg [[REG21:%v[0-9]+]], [[REG22]], [[REG16]] ; CHECK-NEXT: vx %v0, [[REG11]], [[REG21]] ; CHECK-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-NEXT: br %r14 @@ -610,30 +610,30 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vmrhf %v3, %v28, %v28 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vmrlf %v1, %v30, %v30 -; CHECK-NEXT: vmrlf %v2, %v28, %v28 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vmrhf %v2, %v30, %v30 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vldeb %v3, %v3 -; CHECK-NEXT: vfchdb %v2, %v3, %v2 -; CHECK-NEXT: vpkg %v1, %v2, %v1 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb %v0, %v1, %v0 +; CHECK-DAG: vmrhf [[REG0:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vmrhf [[REG2:%v[0-9]+]], %v28, %v28 +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb [[REG3:%v[0-9]+]], [[REG1]], [[REG0]] +; CHECK-DAG: vpkg %v0, [[REG3]], %v0 +; CHECK-DAG: vmrlf %v1, %v30, %v30 +; CHECK-DAG: vmrlf [[REG4:%v[0-9]+]], %v28, %v28 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vldeb [[REG4]], [[REG4]] +; CHECK-DAG: vfchdb %v1, [[REG4]], %v1 +; CHECK-DAG: vmrhf [[REG5:%v[0-9]+]], %v30, %v30 +; CHECK-NEXT: vldeb [[REG5]], [[REG5]] +; CHECK-NEXT: vldeb [[REG2]], [[REG2]] +; CHECK-NEXT: vfchdb [[REG6:%v[0-9]+]], [[REG2]], [[REG5]] +; CHECK-NEXT: vpkg %v1, [[REG6]], %v1 ; CHECK-NEXT: vx %v0, %v0, %v1 ; CHECK-NEXT: vmrlg %v1, %v0, %v0 -; CHECK-NEXT: vuphf %v1, %v1 -; CHECK-NEXT: vuphf %v0, %v0 +; CHECK-DAG: vuphf %v1, %v1 +; CHECK-DAG: vuphf %v0, %v0 ; CHECK-NEXT: vsel %v24, %v25, %v29, %v0 ; CHECK-NEXT: vsel %v26, %v27, %v31, %v1 ; CHECK-NEXT: br %r14 @@ -644,8 +644,8 @@ ; CHECK-Z14-NEXT: vfchsb %v1, %v28, %v30 ; CHECK-Z14-NEXT: vx %v0, %v0, %v1 ; CHECK-Z14-NEXT: vmrlg %v1, %v0, %v0 -; CHECK-Z14-NEXT: vuphf %v1, %v1 -; CHECK-Z14-NEXT: vuphf %v0, %v0 +; CHECK-Z14-DAG: vuphf %v1, %v1 +; CHECK-Z14-DAG: vuphf %v0, %v0 ; CHECK-Z14-NEXT: vsel %v24, %v25, %v29, %v0 ; CHECK-Z14-NEXT: vsel %v26, %v27, %v31, %v1 ; CHECK-Z14-NEXT: br %r14 @@ -659,70 +659,70 @@ define <8 x float> @fun30(<8 x float> %val1, <8 x float> %val2, <8 x double> %val3, <8 x double> %val4, <8 x float> %val5, <8 x float> %val6) { ; CHECK-LABEL: fun30: ; CHECK: # %bb.0: -; CHECK-NEXT: vmrlf %v16, %v28, %v28 -; CHECK-NEXT: vmrlf %v17, %v24, %v24 -; CHECK-NEXT: vldeb %v16, %v16 -; CHECK-NEXT: vldeb %v17, %v17 -; CHECK-NEXT: vfchdb %v16, %v17, %v16 -; CHECK-NEXT: vmrhf %v17, %v28, %v28 -; CHECK-NEXT: vmrhf %v18, %v24, %v24 -; CHECK-NEXT: vldeb %v17, %v17 -; CHECK-NEXT: vl %v4, 192(%r15) -; CHECK-NEXT: vldeb %v18, %v18 -; CHECK-NEXT: vl %v5, 208(%r15) -; CHECK-NEXT: vl %v6, 160(%r15) -; CHECK-NEXT: vl %v7, 176(%r15) -; CHECK-NEXT: vl %v0, 272(%r15) -; CHECK-NEXT: vl %v1, 240(%r15) -; CHECK-NEXT: vfchdb %v17, %v18, %v17 -; CHECK-NEXT: vl %v2, 256(%r15) -; CHECK-NEXT: vl %v3, 224(%r15) -; CHECK-NEXT: vpkg %v16, %v17, %v16 -; CHECK-NEXT: vmrlf %v17, %v30, %v30 -; CHECK-NEXT: vmrlf %v18, %v26, %v26 -; CHECK-NEXT: vmrhf %v19, %v26, %v26 -; CHECK-NEXT: vfchdb %v7, %v27, %v7 -; CHECK-NEXT: vfchdb %v6, %v25, %v6 -; CHECK-NEXT: vfchdb %v5, %v31, %v5 -; CHECK-NEXT: vfchdb %v4, %v29, %v4 -; CHECK-NEXT: vpkg %v6, %v6, %v7 -; CHECK-NEXT: vpkg %v4, %v4, %v5 -; CHECK-NEXT: vn %v5, %v16, %v6 -; CHECK-NEXT: vsel %v24, %v3, %v2, %v5 -; CHECK-NEXT: vldeb %v17, %v17 -; CHECK-NEXT: vldeb %v18, %v18 -; CHECK-NEXT: vfchdb %v17, %v18, %v17 -; CHECK-NEXT: vmrhf %v18, %v30, %v30 -; CHECK-NEXT: vldeb %v18, %v18 -; CHECK-NEXT: vldeb %v19, %v19 -; CHECK-NEXT: vfchdb %v18, %v19, %v18 -; CHECK-NEXT: vpkg %v17, %v18, %v17 -; CHECK-NEXT: vn %v4, %v17, %v4 -; CHECK-NEXT: vsel %v26, %v1, %v0, %v4 +; CHECK-DAG: vmrlf [[REG0:%v[0-9]+]], %v28, %v28 +; CHECK-DAG: vmrlf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], [[REG1]], [[REG0]] +; CHECK-DAG: vmrhf %v17, %v28, %v28 +; CHECK-DAG: vmrhf %v18, %v24, %v24 +; CHECK-DAG: vldeb %v17, %v17 +; CHECK-DAG: vl [[REG3:%v[0-9]+]], 192(%r15) +; CHECK-DAG: vldeb %v18, %v18 +; CHECK-DAG: vl [[REG4:%v[0-9]+]], 208(%r15) +; CHECK-DAG: vl [[REG5:%v[0-9]+]], 160(%r15) +; CHECK-DAG: vl [[REG6:%v[0-9]+]], 176(%r15) +; CHECK-DAG: vl [[REG7:%v[0-9]+]], 272(%r15) +; CHECK-DAG: vl [[REG8:%v[0-9]+]], 240(%r15) +; CHECK-DAG: vfchdb [[REG9:%v[0-9]+]], %v18, %v17 +; CHECK-DAG: vl [[REG10:%v[0-9]+]], 256(%r15) +; CHECK-DAG: vl [[REG11:%v[0-9]+]], 224(%r15) +; CHECK-DAG: vpkg [[REG12:%v[0-9]+]], [[REG9]], [[REG2]] +; CHECK-DAG: vmrlf [[REG13:%v[0-9]+]], %v30, %v30 +; CHECK-DAG: vmrlf [[REG14:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG15:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vfchdb [[REG16:%v[0-9]+]], %v27, [[REG6]] +; CHECK-DAG: vfchdb [[REG17:%v[0-9]+]], %v25, [[REG5]] +; CHECK-DAG: vfchdb [[REG18:%v[0-9]+]], %v31, [[REG4]] +; CHECK-DAG: vfchdb [[REG19:%v[0-9]+]], %v29, [[REG3]] +; CHECK-DAG: vpkg [[REG20:%v[0-9]+]], [[REG17]], [[REG16]] +; CHECK-DAG: vpkg [[REG21:%v[0-9]+]], [[REG19]], [[REG18]] +; CHECK-DAG: vn [[REG22:%v[0-9]+]], [[REG12]], [[REG20]] +; CHECK-DAG: vsel %v24, [[REG11]], [[REG10]], [[REG22]] +; CHECK-DAG: vldeb [[REG13]], [[REG13]] +; CHECK-DAG: vldeb [[REG14]], [[REG14]] +; CHECK-DAG: vfchdb [[REG23:%v[0-9]+]], [[REG14]], [[REG13]] +; CHECK-DAG: vmrhf [[REG24:%v[0-9]+]], %v30, %v30 +; CHECK-DAG: vldeb [[REG24]], [[REG24]] +; CHECK-DAG: vldeb [[REG15]], [[REG15]] +; CHECK-DAG: vfchdb [[REG25:%v[0-9]+]], [[REG15]], [[REG24]] +; CHECK-DAG: vpkg [[REG26:%v[0-9]+]], [[REG25]], [[REG23]] +; CHECK-DAG: vn [[REG27:%v[0-9]+]], [[REG26]], [[REG21]] +; CHECK-DAG: vsel %v26, [[REG8]], [[REG7]], [[REG27]] ; CHECK-NEXT: br %r14 ; ; CHECK-Z14-LABEL: fun30: ; CHECK-Z14: # %bb.0: -; CHECK-Z14-NEXT: vl %v4, 192(%r15) -; CHECK-Z14-NEXT: vl %v5, 208(%r15) -; CHECK-Z14-NEXT: vl %v6, 160(%r15) -; CHECK-Z14-NEXT: vl %v7, 176(%r15) -; CHECK-Z14-NEXT: vfchdb %v7, %v27, %v7 -; CHECK-Z14-NEXT: vfchdb %v6, %v25, %v6 -; CHECK-Z14-NEXT: vfchdb %v5, %v31, %v5 -; CHECK-Z14-NEXT: vfchdb %v4, %v29, %v4 -; CHECK-Z14-NEXT: vfchsb %v16, %v24, %v28 -; CHECK-Z14-NEXT: vfchsb %v17, %v26, %v30 -; CHECK-Z14-NEXT: vpkg %v6, %v6, %v7 -; CHECK-Z14-NEXT: vpkg %v4, %v4, %v5 +; CHECK-Z14-NEXT: vl [[REG0:%v[0-9]+]], 192(%r15) +; CHECK-Z14-NEXT: vl [[REG1:%v[0-9]+]], 208(%r15) +; CHECK-Z14-NEXT: vl [[REG2:%v[0-9]+]], 160(%r15) +; CHECK-Z14-NEXT: vl [[REG3:%v[0-9]+]], 176(%r15) +; CHECK-Z14-NEXT: vfchdb [[REG4:%v[0-9]+]], %v27, [[REG3]] +; CHECK-Z14-NEXT: vfchdb [[REG5:%v[0-9]+]], %v25, [[REG2]] +; CHECK-Z14-NEXT: vfchdb [[REG6:%v[0-9]+]], %v31, [[REG1]] +; CHECK-Z14-NEXT: vfchdb [[REG7:%v[0-9]+]], %v29, [[REG0]] +; CHECK-Z14-NEXT: vfchsb [[REG8:%v[0-9]+]], %v24, %v28 +; CHECK-Z14-NEXT: vfchsb [[REG9:%v[0-9]+]], %v26, %v30 +; CHECK-Z14-NEXT: vpkg [[REG10:%v[0-9]+]], [[REG5]], [[REG4]] +; CHECK-Z14-NEXT: vpkg [[REG11:%v[0-9]+]], [[REG7]], [[REG6]] ; CHECK-Z14-NEXT: vl %v0, 272(%r15) ; CHECK-Z14-NEXT: vl %v1, 240(%r15) ; CHECK-Z14-NEXT: vl %v2, 256(%r15) -; CHECK-Z14-NEXT: vl %v3, 224(%r15) -; CHECK-Z14-NEXT: vn %v4, %v17, %v4 -; CHECK-Z14-NEXT: vn %v5, %v16, %v6 -; CHECK-Z14-NEXT: vsel %v24, %v3, %v2, %v5 -; CHECK-Z14-NEXT: vsel %v26, %v1, %v0, %v4 +; CHECK-Z14-NEXT: vl [[REG14:%v[0-9]+]], 224(%r15) +; CHECK-Z14-NEXT: vn [[REG12:%v[0-9]+]], [[REG9]], [[REG11]] +; CHECK-Z14-NEXT: vn [[REG13:%v[0-9]+]], [[REG8]], [[REG10]] +; CHECK-Z14-NEXT: vsel %v24, [[REG14]], %v2, [[REG13]] +; CHECK-Z14-NEXT: vsel %v26, %v1, %v0, [[REG12]] ; CHECK-Z14-NEXT: br %r14 %cmp0 = fcmp ogt <8 x float> %val1, %val2 %cmp1 = fcmp ogt <8 x double> %val3, %val4 @@ -765,20 +765,20 @@ define <4 x float> @fun33(<4 x double> %val1, <4 x double> %val2, <4 x float> %val3, <4 x float> %val4, <4 x float> %val5, <4 x float> %val6) { ; CHECK-LABEL: fun33: ; CHECK: # %bb.0: -; CHECK-NEXT: vfchdb %v0, %v26, %v30 -; CHECK-NEXT: vfchdb %v1, %v24, %v28 -; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vmrlf %v1, %v27, %v27 -; CHECK-NEXT: vmrlf %v2, %v25, %v25 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vmrhf %v2, %v27, %v27 -; CHECK-NEXT: vmrhf %v3, %v25, %v25 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vldeb %v3, %v3 -; CHECK-NEXT: vfchdb %v2, %v3, %v2 -; CHECK-NEXT: vpkg %v1, %v2, %v1 +; CHECK-DAG: vfchdb %v0, %v26, %v30 +; CHECK-DAG: vfchdb %v1, %v24, %v28 +; CHECK-DAG: vpkg %v0, %v1, %v0 +; CHECK-DAG: vmrlf [[REG0:%v[0-9]+]], %v27, %v27 +; CHECK-DAG: vmrlf [[REG1:%v[0-9]+]], %v25, %v25 +; CHECK-DAG: vldeb [[REG2:%v[0-9]+]], [[REG0]] +; CHECK-DAG: vldeb [[REG3:%v[0-9]+]], [[REG1]] +; CHECK-DAG: vfchdb [[REG4:%v[0-9]+]], [[REG3]], [[REG2]] +; CHECK-DAG: vmrhf [[REG5:%v[0-9]+]], %v27, %v27 +; CHECK-DAG: vmrhf [[REG6:%v[0-9]+]], %v25, %v25 +; CHECK-DAG: vldeb [[REG7:%v[0-9]+]], [[REG5]] +; CHECK-DAG: vldeb [[REG8:%v[0-9]+]], [[REG6]] +; CHECK-DAG: vfchdb %v2, [[REG8]], [[REG7]] +; CHECK-NEXT: vpkg %v1, %v2, [[REG4]] ; CHECK-NEXT: vn %v0, %v0, %v1 ; CHECK-NEXT: vsel %v24, %v29, %v31, %v0 ; CHECK-NEXT: br %r14 @@ -787,9 +787,9 @@ ; CHECK-Z14: # %bb.0: ; CHECK-Z14-NEXT: vfchdb %v0, %v26, %v30 ; CHECK-Z14-NEXT: vfchdb %v1, %v24, %v28 -; CHECK-Z14-NEXT: vpkg %v0, %v1, %v0 -; CHECK-Z14-NEXT: vfchsb %v1, %v25, %v27 -; CHECK-Z14-NEXT: vn %v0, %v0, %v1 +; CHECK-Z14-DAG: vpkg %v0, %v1, %v0 +; CHECK-Z14-DAG: vfchsb [[REG0:%v[0-9]+]], %v25, %v27 +; CHECK-Z14-NEXT: vn %v0, %v0, [[REG0]] ; CHECK-Z14-NEXT: vsel %v24, %v29, %v31, %v0 ; CHECK-Z14-NEXT: br %r14 %cmp0 = fcmp ogt <4 x double> %val1, %val2 @@ -802,13 +802,13 @@ define <4 x double> @fun34(<4 x double> %val1, <4 x double> %val2, <4 x float> %val3, <4 x float> %val4, <4 x double> %val5, <4 x double> %val6) { ; CHECK-LABEL: fun34: ; CHECK: # %bb.0: -; CHECK-NEXT: vmrlf [[REG0:%v[0-9]+]], %v27, %v27 -; CHECK-NEXT: vmrlf [[REG1:%v[0-9]+]], %v25, %v25 -; CHECK-NEXT: vldeb [[REG2:%v[0-9]+]], [[REG0]] -; CHECK-NEXT: vldeb [[REG3:%v[0-9]+]], [[REG1]] -; CHECK-NEXT: vfchdb [[REG4:%v[0-9]+]], [[REG3]], [[REG2]] -; CHECK-NEXT: vmrhf [[REG5:%v[0-9]+]], %v27, %v27 -; CHECK-NEXT: vmrhf [[REG6:%v[0-9]+]], %v25, %v25 +; CHECK-DAG: vmrlf [[REG0:%v[0-9]+]], %v27, %v27 +; CHECK-DAG: vmrlf [[REG1:%v[0-9]+]], %v25, %v25 +; CHECK-DAG: vldeb [[REG2:%v[0-9]+]], [[REG0]] +; CHECK-DAG: vldeb [[REG3:%v[0-9]+]], [[REG1]] +; CHECK-DAG: vfchdb [[REG4:%v[0-9]+]], [[REG3]], [[REG2]] +; CHECK-DAG: vmrhf [[REG5:%v[0-9]+]], %v27, %v27 +; CHECK-DAG: vmrhf [[REG6:%v[0-9]+]], %v25, %v25 ; CHECK-DAG: vldeb [[REG7:%v[0-9]+]], [[REG5]] ; CHECK-DAG: vl [[REG8:%v[0-9]+]], 176(%r15) ; CHECK-DAG: vldeb [[REG9:%v[0-9]+]], [[REG6]] @@ -820,26 +820,26 @@ ; CHECK-NEXT: vfchdb [[REG15:%v[0-9]+]], %v24, %v28 ; CHECK-NEXT: vfchdb [[REG16:%v[0-9]+]], %v26, %v30 ; CHECK-NEXT: vuphf [[REG17:%v[0-9]+]], [[REG14]] -; CHECK-NEXT: vn [[REG18:%v[0-9]+]], [[REG16]], [[REG17]] -; CHECK-NEXT: vn [[REG19:%v[0-9]+]], [[REG15]], [[REG13]] +; CHECK-DAG: vn [[REG18:%v[0-9]+]], [[REG16]], [[REG17]] +; CHECK-DAG: vn [[REG19:%v[0-9]+]], [[REG15]], [[REG13]] ; CHECK-NEXT: vsel %v24, %v29, [[REG10]], [[REG19]] ; CHECK-NEXT: vsel %v26, %v31, [[REG8]], [[REG18]] ; CHECK-NEXT: br %r14 ; ; CHECK-Z14-LABEL: fun34: ; CHECK-Z14: # %bb.0: -; CHECK-Z14-NEXT: vfchsb %v4, %v25, %v27 -; CHECK-Z14-NEXT: vuphf %v5, %v4 -; CHECK-Z14-NEXT: vmrlg %v4, %v4, %v4 -; CHECK-Z14-NEXT: vfchdb %v2, %v24, %v28 -; CHECK-Z14-NEXT: vfchdb %v3, %v26, %v30 -; CHECK-Z14-NEXT: vuphf %v4, %v4 +; CHECK-Z14-NEXT: vfchsb [[REG0:%v[0-9]+]], %v25, %v27 +; CHECK-Z14-NEXT: vuphf [[REG1:%v[0-9]+]], [[REG0]] +; CHECK-Z14-NEXT: vmrlg [[REG0]], [[REG0]], [[REG0]] +; CHECK-Z14-NEXT: vfchdb [[REG2:%v[0-9]+]], %v24, %v28 +; CHECK-Z14-NEXT: vfchdb [[REG3:%v[0-9]+]], %v26, %v30 +; CHECK-Z14-NEXT: vuphf [[REG0]], [[REG0]] ; CHECK-Z14-NEXT: vl %v0, 176(%r15) -; CHECK-Z14-NEXT: vl %v1, 160(%r15) -; CHECK-Z14-NEXT: vn %v3, %v3, %v4 -; CHECK-Z14-NEXT: vn %v2, %v2, %v5 -; CHECK-Z14-NEXT: vsel %v24, %v29, %v1, %v2 -; CHECK-Z14-NEXT: vsel %v26, %v31, %v0, %v3 +; CHECK-Z14-NEXT: vl [[REG4:%v[0-9]+]], 160(%r15) +; CHECK-Z14-DAG: vn [[REG5:%v[0-9]+]], [[REG3]], [[REG0]] +; CHECK-Z14-DAG: vn [[REG6:%v[0-9]+]], [[REG2]], [[REG1]] +; CHECK-Z14-NEXT: vsel %v24, %v29, [[REG4]], [[REG6]] +; CHECK-Z14-NEXT: vsel %v26, %v31, %v0, [[REG5]] ; CHECK-Z14-NEXT: br %r14 %cmp0 = fcmp ogt <4 x double> %val1, %val2 %cmp1 = fcmp ogt <4 x float> %val3, %val4 Index: test/CodeGen/SystemZ/vec-cmpsel.ll =================================================================== --- test/CodeGen/SystemZ/vec-cmpsel.ll +++ test/CodeGen/SystemZ/vec-cmpsel.ll @@ -316,17 +316,17 @@ define <2 x float> @fun25(<2 x float> %val1, <2 x float> %val2, <2 x float> %val3, <2 x float> %val4) { ; CHECK-LABEL: fun25: ; CHECK: # %bb.0: -; CHECK-NEXT: vmrlf %v0, %v26, %v26 -; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vpkg %v0, %v1, %v0 +; CHECK-DAG: vmrlf %v0, %v26, %v26 +; CHECK-DAG: vmrlf %v1, %v24, %v24 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb [[REG0:%v[0-9]+]], %v1, %v0 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG2:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vldeb [[REG2]], [[REG2]] +; CHECK-DAG: vfchdb [[REG3:%v[0-9]+]], [[REG2]], [[REG1]] +; CHECK-NEXT: vpkg %v0, [[REG3]], [[REG0]] ; CHECK-NEXT: vsel %v24, %v28, %v30, %v0 ; CHECK-NEXT: br %r14 @@ -344,16 +344,16 @@ define <2 x double> @fun26(<2 x float> %val1, <2 x float> %val2, <2 x double> %val3, <2 x double> %val4) { ; CHECK-LABEL: fun26: ; CHECK: # %bb.0: -; CHECK-NEXT: vmrlf %v0, %v26, %v26 -; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 +; CHECK-DAG: vmrlf %v0, %v26, %v26 +; CHECK-DAG: vmrlf %v1, %v24, %v24 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb %v0, %v1, %v0 +; CHECK-DAG: vmrhf [[REG0:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb %v1, [[REG1]], [[REG0]] ; CHECK-NEXT: vpkg %v0, %v1, %v0 ; CHECK-NEXT: vuphf %v0, %v0 ; CHECK-NEXT: vsel %v24, %v28, %v30, %v0 @@ -391,14 +391,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb %v0, %v1, %v0 +; CHECK-DAG: vmrhf [[REG0:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb %v1, [[REG1]], [[REG0]] ; CHECK-NEXT: vpkg %v0, %v1, %v0 ; CHECK-NEXT: vsel %v24, %v28, %v30, %v0 ; CHECK-NEXT: br %r14 @@ -419,15 +419,15 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vpkg [[REG0:%v[0-9]+]], %v1, %v0 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb %v0, %v1, %v0 +; CHECK-DAG: vmrhf [[REG0:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb %v1, [[REG1]], [[REG0]] +; CHECK-DAG: vpkg [[REG0:%v[0-9]+]], %v1, %v0 ; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], [[REG0]], [[REG0]] ; CHECK-DAG: vuphf [[REG1]], [[REG1]] ; CHECK-DAG: vuphf [[REG2:%v[0-9]+]], [[REG0]] Index: test/CodeGen/SystemZ/vec-ctpop-01.ll =================================================================== --- test/CodeGen/SystemZ/vec-ctpop-01.ll +++ test/CodeGen/SystemZ/vec-ctpop-01.ll @@ -30,8 +30,8 @@ define <4 x i32> @f3(<4 x i32> %a) { ; CHECK-LABEL: f3: -; CHECK: vpopct [[T1:%v[0-9]+]], %v24, 0 -; CHECK: vgbm [[T2:%v[0-9]+]], 0 +; CHECK-DAG: vpopct [[T1:%v[0-9]+]], %v24, 0 +; CHECK-DAG: vgbm [[T2:%v[0-9]+]], 0 ; CHECK: vsumb %v24, [[T1]], [[T2]] ; CHECK: br %r14 @@ -41,8 +41,8 @@ define <2 x i64> @f4(<2 x i64> %a) { ; CHECK-LABEL: f4: -; CHECK: vpopct [[T1:%v[0-9]+]], %v24, 0 -; CHECK: vgbm [[T2:%v[0-9]+]], 0 +; CHECK-DAG: vpopct [[T1:%v[0-9]+]], %v24, 0 +; CHECK-DAG: vgbm [[T2:%v[0-9]+]], 0 ; CHECK: vsumb [[T3:%v[0-9]+]], [[T1]], [[T2]] ; CHECK: vsumgf %v24, [[T3]], [[T2]] ; CHECK: br %r14