Index: include/llvm/CodeGen/MachineScheduler.h =================================================================== --- include/llvm/CodeGen/MachineScheduler.h +++ include/llvm/CodeGen/MachineScheduler.h @@ -892,6 +892,9 @@ void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone); + bool tryLatency(SchedCandidate &TryCand, SchedCandidate &Cand, + SchedBoundary &Zone); + #ifndef NDEBUG void traceCandidate(const SchedCandidate &Cand); #endif @@ -963,9 +966,20 @@ const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker); - void tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary *Zone); + bool tryCandidate_RegPress(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone); + bool tryCandidate_Latency(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone); + bool tryCandidate_Clustered_Weak(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone); + bool tryCandidate_RegPress2(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone); + bool tryCandidate_Resources(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone); + bool tryCandidate_Latency2_Order(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone); + virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone); SUnit *pickNodeBidirectional(bool &IsTopNode); Index: lib/CodeGen/MachineScheduler.cpp =================================================================== --- lib/CodeGen/MachineScheduler.cpp +++ lib/CodeGen/MachineScheduler.cpp @@ -2598,9 +2598,9 @@ return false; } -static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, - GenericSchedulerBase::SchedCandidate &Cand, - SchedBoundary &Zone) { +bool GenericSchedulerBase:: +tryLatency(SchedCandidate &TryCand, SchedCandidate &Cand, + SchedBoundary &Zone) { if (Zone.isTop()) { if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), @@ -2884,44 +2884,40 @@ << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n"); } -/// Apply a set of heursitics to a new candidate. Heuristics are currently -/// hierarchical. This may be more efficient than a graduated cost model because -/// we don't need to evaluate all aspects of the model for each node in the -/// queue. But it's really done to make the heuristics easier to debug and -/// statistically analyze. -/// -/// \param Cand provides the policy and current best candidate. -/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. -/// \param Zone describes the scheduled zone that we are extending, or nullptr -// if Cand is from a different zone than TryCand. -void GenericScheduler::tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary *Zone) { +bool GenericScheduler::tryCandidate_RegPress(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) { // Initialize the candidate if needed. if (!Cand.isValid()) { TryCand.Reason = NodeOrder; - return; + return true; } if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop), biasPhysRegCopy(Cand.SU, Cand.AtTop), TryCand, Cand, PhysRegCopy)) - return; + return true; // Avoid exceeding the target's limit. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, RegExcess, TRI, DAG->MF)) - return; + return true; // Avoid increasing the max critical pressure in the scheduled region. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, TryCand, Cand, RegCritical, TRI, DAG->MF)) - return; + return true; + return false; +} + +bool GenericScheduler::tryCandidate_Latency(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) { // We only compare a subset of features when comparing nodes between // Top and Bottom boundary. Some properties are simply incomparable, in many // other instances we should only override the other boundary if something @@ -2934,14 +2930,20 @@ // heuristics to take precedence. if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && tryLatency(TryCand, Cand, *Zone)) - return; + return true; // Prioritize instructions that read unbuffered resources by stall cycles. if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; + return true; } + return false; +} + +bool GenericScheduler::tryCandidate_Clustered_Weak(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) { // Keep clustered nodes together to encourage downstream peephole // optimizations which may reduce resource requirements. // @@ -2955,46 +2957,105 @@ if (tryGreater(TryCand.SU == TryCandNextClusterSU, Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) - return; + return true; + bool SameBoundary = Zone != nullptr; if (SameBoundary) { // Weak edges are for clustering and other constraints. if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak)) - return; + return true; } + return false; +} + +bool GenericScheduler::tryCandidate_RegPress2(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) { // Avoid increasing the max pressure of the entire region. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand, Cand, RegMax, TRI, DAG->MF)) - return; + return true; + + return false; +} +bool GenericScheduler::tryCandidate_Resources(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) { + bool SameBoundary = Zone != nullptr; if (SameBoundary) { // Avoid critical resource consumption and balance the schedule. TryCand.initResourceDelta(DAG, SchedModel); if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, TryCand, Cand, ResourceReduce)) - return; + return true; if (tryGreater(TryCand.ResDelta.DemandedResources, Cand.ResDelta.DemandedResources, TryCand, Cand, ResourceDemand)) - return; + return true; + } + return false; +} + +bool GenericScheduler::tryCandidate_Latency2_Order(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) { + bool SameBoundary = Zone != nullptr; + if (SameBoundary) { // Avoid serializing long latency dependence chains. // For acyclic path limited loops, latency was already checked above. if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) - return; + return true; // Fall through to original instruction order. if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { TryCand.Reason = NodeOrder; + return true; } } + + return false; +} + +/// Apply a set of heursitics to a new candidate. Heuristics are currently +/// hierarchical. This may be more efficient than a graduated cost model because +/// we don't need to evaluate all aspects of the model for each node in the +/// queue. But it's really done to make the heuristics easier to debug and +/// statistically analyze. +/// +/// \param Cand provides the policy and current best candidate. +/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. +/// \param Zone describes the scheduled zone that we are extending, or nullptr +// if Cand is from a different zone than TryCand. +void GenericScheduler::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) { + + if (tryCandidate_RegPress(Cand, TryCand, Zone)) + return; + + if (tryCandidate_Latency(Cand, TryCand, Zone)) + return; + + if (tryCandidate_Clustered_Weak(Cand, TryCand, Zone)) + return; + + if (tryCandidate_RegPress2(Cand, TryCand, Zone)) + return; + + if (tryCandidate_Resources(Cand, TryCand, Zone)) + return; + + if (tryCandidate_Latency2_Order(Cand, TryCand, Zone)) + return; } /// Pick the best candidate from the queue. Index: lib/Target/SystemZ/SystemZMachineScheduler.h =================================================================== --- lib/Target/SystemZ/SystemZMachineScheduler.h +++ lib/Target/SystemZ/SystemZMachineScheduler.h @@ -142,6 +142,26 @@ void releaseBottomNode(SUnit *SU) override {}; }; +class SystemZPreRASchedStrategy : public GenericScheduler { + const SystemZSubtarget *ST; + const TargetInstrInfo *TII; + TargetSchedModel SchedModel; + MCSchedModel MCSchedM; + + public: + SystemZPreRASchedStrategy(const MachineSchedContext *C) : + GenericScheduler(C), + ST(&C->MF->getSubtarget()), + TII(ST->getInstrInfo()) { + SchedModel.init(ST->getSchedModel(), ST, TII); + MCSchedM = ST->getSchedModel(); + } + + void tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) override; +}; + } // end namespace llvm #endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H Index: lib/Target/SystemZ/SystemZMachineScheduler.cpp =================================================================== --- lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -256,3 +256,51 @@ // Put all released SUs in the Available set. Available.insert(SU); } + + +//////////// Pre-RA scheduling + +void SystemZPreRASchedStrategy:: +tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) { + if (tryCandidate_RegPress(Cand, TryCand, Zone)) + return; + + if (tryCandidate_Latency(Cand, TryCand, Zone)) + return; + + if (tryCandidate_Clustered_Weak(Cand, TryCand, Zone)) + return; + + if (tryCandidate_RegPress2(Cand, TryCand, Zone)) + return; + + // Latency boost for instructions using the vector unit. + // FIXME: Find a better way to detemine if TryCand uses the vector unit. + if (ST->hasVector()) { + bool VectorPipeline = false; + unsigned SchedClass = + TII->get(TryCand.SU->getInstr()->getOpcode()).getSchedClass(); + const MCSchedClassDesc *SC = MCSchedM.getSchedClassDesc(SchedClass); + for (TargetSchedModel::ProcResIter + PI = SchedModel.getWriteProcResBegin(SC), + PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { + unsigned PIdx = PI->ProcResourceIdx; + const MCProcResourceDesc &PRD = *SchedModel.getProcResource(PIdx); + std::string PRName(PRD.Name); + if (PRName.find("VecUnit") != std::string::npos) { + VectorPipeline = true; + break; + } + } + if (VectorPipeline && tryLatency(TryCand, Cand, *Zone)) + return; + } + + if (tryCandidate_Resources(Cand, TryCand, Zone)) + return; + + if (tryCandidate_Latency2_Order(Cand, TryCand, Zone)) + return; +} Index: lib/Target/SystemZ/SystemZTargetMachine.cpp =================================================================== --- lib/Target/SystemZ/SystemZTargetMachine.cpp +++ lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -167,6 +167,17 @@ } ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + // To run the generic pre-RA scheduler use: -misched=converge + ScheduleDAGMILive *DAG = + new ScheduleDAGMILive(C, llvm::make_unique(C)); + + // Use same DAG mutators as are applied in createGenericSchedLive(). + DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); + return DAG; + } + + ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { return new ScheduleDAGMI(C, llvm::make_unique(C), Index: test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll =================================================================== --- test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll +++ test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll @@ -64,7 +64,7 @@ ; CHECK-DAG: vceqh [[REG4:%v[0-9]+]], %v30, %v27 ; CHECK-DAG: vl [[REG5:%v[0-9]+]], 176(%r15) ; CHECK-DAG: vl [[REG6:%v[0-9]+]], 160(%r15) -; CHECK-DAG: vo [[REG7:%v[0-9]+]], %v2, [[REG4]] +; CHECK-DAG: vo [[REG7:%v[0-9]+]], [[REG1]], [[REG4]] ; CHECK-DAG: vo [[REG8:%v[0-9]+]], [[REG2]], [[REG3]] ; CHECK-DAG: vsel %v24, %v29, [[REG6]], [[REG8]] ; CHECK-DAG: vsel %v26, %v31, [[REG5]], [[REG7]] @@ -439,9 +439,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vceqg %v0, %v26, %v30 ; CHECK-NEXT: vceqg %v1, %v24, %v28 -; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vceqf %v1, %v25, %v27 -; CHECK-NEXT: vx %v0, %v0, %v1 +; CHECK-DAG: vpkg %v0, %v1, %v0 +; CHECK-DAG: vceqf [[REG0:%v[0-9]+]], %v25, %v27 +; CHECK-NEXT: vx %v0, %v0, [[REG0]] ; CHECK-NEXT: vsel %v24, %v29, %v31, %v0 ; CHECK-NEXT: br %r14 %cmp0 = icmp eq <4 x i64> %val1, %val2 @@ -479,18 +479,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vfchdb %v1, %v28, %v30 -; CHECK-NEXT: vpkg %v1, %v1, %v1 -; CHECK-NEXT: vo %v0, %v0, %v1 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb %v0, %v1, %v0 +; CHECK-DAG: vmrhf [[REG0:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb %v1, [[REG1]], [[REG0]] +; CHECK-DAG: vpkg %v0, %v1, %v0 +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v28, %v30 +; CHECK-DAG: vpkg [[REG2]], [[REG2]], [[REG2]] +; CHECK-NEXT: vo %v0, %v0, [[REG2]] ; CHECK-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-NEXT: br %r14 ; @@ -514,26 +514,26 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vuphf %v0, %v0 -; CHECK-NEXT: vfchdb %v1, %v28, %v30 -; CHECK-NEXT: vo %v0, %v0, %v1 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb %v0, %v1, %v0 +; CHECK-DAG: vmrhf [[REG0:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb %v1, [[REG1]], [[REG0]] +; CHECK-DAG: vpkg %v0, %v1, %v0 +; CHECK-DAG: vuphf %v0, %v0 +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v28, %v30 +; CHECK-NEXT: vo %v0, %v0, [[REG2]] ; CHECK-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-NEXT: br %r14 ; ; CHECK-Z14-LABEL: fun26: ; CHECK-Z14: # %bb.0: ; CHECK-Z14-NEXT: vfchsb %v0, %v24, %v26 -; CHECK-Z14-NEXT: vuphf %v0, %v0 -; CHECK-Z14-NEXT: vfchdb %v1, %v28, %v30 +; CHECK-Z14-DAG: vuphf %v0, %v0 +; CHECK-Z14-DAG: vfchdb %v1, %v28, %v30 ; CHECK-Z14-NEXT: vo %v0, %v0, %v1 ; CHECK-Z14-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-Z14-NEXT: br %r14 @@ -585,8 +585,8 @@ ; CHECK-DAG: vmrhf [[REG17:%v[0-9]+]], %v30, %v30 ; CHECK-DAG: vldeb [[REG19:%v[0-9]+]], [[REG17]] ; CHECK-DAG: vldeb [[REG20:%v[0-9]+]], [[REG8]] -; CHECK-NEXT: vfchdb %v2, [[REG20]], [[REG19]] -; CHECK-NEXT: vpkg [[REG21:%v[0-9]+]], %v2, [[REG16]] +; CHECK-NEXT: vfchdb [[REG22:%v[0-9]+]], [[REG20]], [[REG19]] +; CHECK-NEXT: vpkg [[REG21:%v[0-9]+]], [[REG22]], [[REG16]] ; CHECK-NEXT: vx %v0, [[REG11]], [[REG21]] ; CHECK-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-NEXT: br %r14 @@ -610,30 +610,30 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vmrhf %v3, %v28, %v28 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vmrlf %v1, %v30, %v30 -; CHECK-NEXT: vmrlf %v2, %v28, %v28 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vmrhf %v2, %v30, %v30 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vldeb %v3, %v3 -; CHECK-NEXT: vfchdb %v2, %v3, %v2 -; CHECK-NEXT: vpkg %v1, %v2, %v1 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb %v0, %v1, %v0 +; CHECK-DAG: vmrhf [[REG0:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vmrhf [[REG2:%v[0-9]+]], %v28, %v28 +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb [[REG3:%v[0-9]+]], [[REG1]], [[REG0]] +; CHECK-DAG: vpkg %v0, [[REG3]], %v0 +; CHECK-DAG: vmrlf %v1, %v30, %v30 +; CHECK-DAG: vmrlf [[REG4:%v[0-9]+]], %v28, %v28 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vldeb [[REG4]], [[REG4]] +; CHECK-DAG: vfchdb %v1, [[REG4]], %v1 +; CHECK-DAG: vmrhf [[REG5:%v[0-9]+]], %v30, %v30 +; CHECK-NEXT: vldeb [[REG5]], [[REG5]] +; CHECK-NEXT: vldeb [[REG2]], [[REG2]] +; CHECK-NEXT: vfchdb [[REG6:%v[0-9]+]], [[REG2]], [[REG5]] +; CHECK-NEXT: vpkg %v1, [[REG6]], %v1 ; CHECK-NEXT: vx %v0, %v0, %v1 ; CHECK-NEXT: vmrlg %v1, %v0, %v0 -; CHECK-NEXT: vuphf %v1, %v1 -; CHECK-NEXT: vuphf %v0, %v0 +; CHECK-DAG: vuphf %v1, %v1 +; CHECK-DAG: vuphf %v0, %v0 ; CHECK-NEXT: vsel %v24, %v25, %v29, %v0 ; CHECK-NEXT: vsel %v26, %v27, %v31, %v1 ; CHECK-NEXT: br %r14 @@ -644,8 +644,8 @@ ; CHECK-Z14-NEXT: vfchsb %v1, %v28, %v30 ; CHECK-Z14-NEXT: vx %v0, %v0, %v1 ; CHECK-Z14-NEXT: vmrlg %v1, %v0, %v0 -; CHECK-Z14-NEXT: vuphf %v1, %v1 -; CHECK-Z14-NEXT: vuphf %v0, %v0 +; CHECK-Z14-DAG: vuphf %v1, %v1 +; CHECK-Z14-DAG: vuphf %v0, %v0 ; CHECK-Z14-NEXT: vsel %v24, %v25, %v29, %v0 ; CHECK-Z14-NEXT: vsel %v26, %v27, %v31, %v1 ; CHECK-Z14-NEXT: br %r14 @@ -659,70 +659,70 @@ define <8 x float> @fun30(<8 x float> %val1, <8 x float> %val2, <8 x double> %val3, <8 x double> %val4, <8 x float> %val5, <8 x float> %val6) { ; CHECK-LABEL: fun30: ; CHECK: # %bb.0: -; CHECK-NEXT: vmrlf %v16, %v28, %v28 -; CHECK-NEXT: vmrlf %v17, %v24, %v24 -; CHECK-NEXT: vldeb %v16, %v16 -; CHECK-NEXT: vldeb %v17, %v17 -; CHECK-NEXT: vfchdb %v16, %v17, %v16 -; CHECK-NEXT: vmrhf %v17, %v28, %v28 -; CHECK-NEXT: vmrhf %v18, %v24, %v24 -; CHECK-NEXT: vldeb %v17, %v17 -; CHECK-NEXT: vl %v4, 192(%r15) -; CHECK-NEXT: vldeb %v18, %v18 -; CHECK-NEXT: vl %v5, 208(%r15) -; CHECK-NEXT: vl %v6, 160(%r15) -; CHECK-NEXT: vl %v7, 176(%r15) -; CHECK-NEXT: vl %v0, 272(%r15) -; CHECK-NEXT: vl %v1, 240(%r15) -; CHECK-NEXT: vfchdb %v17, %v18, %v17 -; CHECK-NEXT: vl %v2, 256(%r15) -; CHECK-NEXT: vl %v3, 224(%r15) -; CHECK-NEXT: vpkg %v16, %v17, %v16 -; CHECK-NEXT: vmrlf %v17, %v30, %v30 -; CHECK-NEXT: vmrlf %v18, %v26, %v26 -; CHECK-NEXT: vmrhf %v19, %v26, %v26 -; CHECK-NEXT: vfchdb %v7, %v27, %v7 -; CHECK-NEXT: vfchdb %v6, %v25, %v6 -; CHECK-NEXT: vfchdb %v5, %v31, %v5 -; CHECK-NEXT: vfchdb %v4, %v29, %v4 -; CHECK-NEXT: vpkg %v6, %v6, %v7 -; CHECK-NEXT: vpkg %v4, %v4, %v5 -; CHECK-NEXT: vn %v5, %v16, %v6 -; CHECK-NEXT: vsel %v24, %v3, %v2, %v5 -; CHECK-NEXT: vldeb %v17, %v17 -; CHECK-NEXT: vldeb %v18, %v18 -; CHECK-NEXT: vfchdb %v17, %v18, %v17 -; CHECK-NEXT: vmrhf %v18, %v30, %v30 -; CHECK-NEXT: vldeb %v18, %v18 -; CHECK-NEXT: vldeb %v19, %v19 -; CHECK-NEXT: vfchdb %v18, %v19, %v18 -; CHECK-NEXT: vpkg %v17, %v18, %v17 -; CHECK-NEXT: vn %v4, %v17, %v4 -; CHECK-NEXT: vsel %v26, %v1, %v0, %v4 +; CHECK-DAG: vmrlf [[REG0:%v[0-9]+]], %v28, %v28 +; CHECK-DAG: vmrlf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], [[REG1]], [[REG0]] +; CHECK-DAG: vmrhf %v17, %v28, %v28 +; CHECK-DAG: vmrhf %v18, %v24, %v24 +; CHECK-DAG: vldeb %v17, %v17 +; CHECK-DAG: vl [[REG3:%v[0-9]+]], 192(%r15) +; CHECK-DAG: vldeb %v18, %v18 +; CHECK-DAG: vl [[REG4:%v[0-9]+]], 208(%r15) +; CHECK-DAG: vl [[REG5:%v[0-9]+]], 160(%r15) +; CHECK-DAG: vl [[REG6:%v[0-9]+]], 176(%r15) +; CHECK-DAG: vl [[REG7:%v[0-9]+]], 272(%r15) +; CHECK-DAG: vl [[REG8:%v[0-9]+]], 240(%r15) +; CHECK-DAG: vfchdb [[REG9:%v[0-9]+]], %v18, %v17 +; CHECK-DAG: vl [[REG10:%v[0-9]+]], 256(%r15) +; CHECK-DAG: vl [[REG11:%v[0-9]+]], 224(%r15) +; CHECK-DAG: vpkg [[REG12:%v[0-9]+]], [[REG9]], [[REG2]] +; CHECK-DAG: vmrlf [[REG13:%v[0-9]+]], %v30, %v30 +; CHECK-DAG: vmrlf [[REG14:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG15:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vfchdb [[REG16:%v[0-9]+]], %v27, [[REG6]] +; CHECK-DAG: vfchdb [[REG17:%v[0-9]+]], %v25, [[REG5]] +; CHECK-DAG: vfchdb [[REG18:%v[0-9]+]], %v31, [[REG4]] +; CHECK-DAG: vfchdb [[REG19:%v[0-9]+]], %v29, [[REG3]] +; CHECK-DAG: vpkg [[REG20:%v[0-9]+]], [[REG17]], [[REG16]] +; CHECK-DAG: vpkg [[REG21:%v[0-9]+]], [[REG19]], [[REG18]] +; CHECK-DAG: vn [[REG22:%v[0-9]+]], [[REG12]], [[REG20]] +; CHECK-DAG: vsel %v24, [[REG11]], [[REG10]], [[REG22]] +; CHECK-DAG: vldeb [[REG13]], [[REG13]] +; CHECK-DAG: vldeb [[REG14]], [[REG14]] +; CHECK-DAG: vfchdb [[REG23:%v[0-9]+]], [[REG14]], [[REG13]] +; CHECK-DAG: vmrhf [[REG24:%v[0-9]+]], %v30, %v30 +; CHECK-DAG: vldeb [[REG24]], [[REG24]] +; CHECK-DAG: vldeb [[REG15]], [[REG15]] +; CHECK-DAG: vfchdb [[REG25:%v[0-9]+]], [[REG15]], [[REG24]] +; CHECK-DAG: vpkg [[REG26:%v[0-9]+]], [[REG25]], [[REG23]] +; CHECK-DAG: vn [[REG27:%v[0-9]+]], [[REG26]], [[REG21]] +; CHECK-DAG: vsel %v26, [[REG8]], [[REG7]], [[REG27]] ; CHECK-NEXT: br %r14 ; ; CHECK-Z14-LABEL: fun30: ; CHECK-Z14: # %bb.0: -; CHECK-Z14-NEXT: vl %v4, 192(%r15) -; CHECK-Z14-NEXT: vl %v5, 208(%r15) -; CHECK-Z14-NEXT: vl %v6, 160(%r15) -; CHECK-Z14-NEXT: vl %v7, 176(%r15) -; CHECK-Z14-NEXT: vfchdb %v7, %v27, %v7 -; CHECK-Z14-NEXT: vfchdb %v6, %v25, %v6 -; CHECK-Z14-NEXT: vfchdb %v5, %v31, %v5 -; CHECK-Z14-NEXT: vfchdb %v4, %v29, %v4 -; CHECK-Z14-NEXT: vfchsb %v16, %v24, %v28 -; CHECK-Z14-NEXT: vfchsb %v17, %v26, %v30 -; CHECK-Z14-NEXT: vpkg %v6, %v6, %v7 -; CHECK-Z14-NEXT: vpkg %v4, %v4, %v5 +; CHECK-Z14-NEXT: vl [[REG0:%v[0-9]+]], 192(%r15) +; CHECK-Z14-NEXT: vl [[REG1:%v[0-9]+]], 208(%r15) +; CHECK-Z14-NEXT: vl [[REG2:%v[0-9]+]], 160(%r15) +; CHECK-Z14-NEXT: vl [[REG3:%v[0-9]+]], 176(%r15) +; CHECK-Z14-NEXT: vfchdb [[REG4:%v[0-9]+]], %v27, [[REG3]] +; CHECK-Z14-NEXT: vfchdb [[REG5:%v[0-9]+]], %v25, [[REG2]] +; CHECK-Z14-NEXT: vfchdb [[REG6:%v[0-9]+]], %v31, [[REG1]] +; CHECK-Z14-NEXT: vfchdb [[REG7:%v[0-9]+]], %v29, [[REG0]] +; CHECK-Z14-NEXT: vfchsb [[REG8:%v[0-9]+]], %v24, %v28 +; CHECK-Z14-NEXT: vfchsb [[REG9:%v[0-9]+]], %v26, %v30 +; CHECK-Z14-NEXT: vpkg [[REG10:%v[0-9]+]], [[REG5]], [[REG4]] +; CHECK-Z14-NEXT: vpkg [[REG11:%v[0-9]+]], [[REG7]], [[REG6]] ; CHECK-Z14-NEXT: vl %v0, 272(%r15) ; CHECK-Z14-NEXT: vl %v1, 240(%r15) ; CHECK-Z14-NEXT: vl %v2, 256(%r15) -; CHECK-Z14-NEXT: vl %v3, 224(%r15) -; CHECK-Z14-NEXT: vn %v4, %v17, %v4 -; CHECK-Z14-NEXT: vn %v5, %v16, %v6 -; CHECK-Z14-NEXT: vsel %v24, %v3, %v2, %v5 -; CHECK-Z14-NEXT: vsel %v26, %v1, %v0, %v4 +; CHECK-Z14-NEXT: vl [[REG14:%v[0-9]+]], 224(%r15) +; CHECK-Z14-NEXT: vn [[REG12:%v[0-9]+]], [[REG9]], [[REG11]] +; CHECK-Z14-NEXT: vn [[REG13:%v[0-9]+]], [[REG8]], [[REG10]] +; CHECK-Z14-NEXT: vsel %v24, [[REG14]], %v2, [[REG13]] +; CHECK-Z14-NEXT: vsel %v26, %v1, %v0, [[REG12]] ; CHECK-Z14-NEXT: br %r14 %cmp0 = fcmp ogt <8 x float> %val1, %val2 %cmp1 = fcmp ogt <8 x double> %val3, %val4 @@ -765,20 +765,20 @@ define <4 x float> @fun33(<4 x double> %val1, <4 x double> %val2, <4 x float> %val3, <4 x float> %val4, <4 x float> %val5, <4 x float> %val6) { ; CHECK-LABEL: fun33: ; CHECK: # %bb.0: -; CHECK-NEXT: vfchdb %v0, %v26, %v30 -; CHECK-NEXT: vfchdb %v1, %v24, %v28 -; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vmrlf %v1, %v27, %v27 -; CHECK-NEXT: vmrlf %v2, %v25, %v25 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vmrhf %v2, %v27, %v27 -; CHECK-NEXT: vmrhf %v3, %v25, %v25 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vldeb %v3, %v3 -; CHECK-NEXT: vfchdb %v2, %v3, %v2 -; CHECK-NEXT: vpkg %v1, %v2, %v1 +; CHECK-DAG: vfchdb %v0, %v26, %v30 +; CHECK-DAG: vfchdb %v1, %v24, %v28 +; CHECK-DAG: vpkg %v0, %v1, %v0 +; CHECK-DAG: vmrlf [[REG0:%v[0-9]+]], %v27, %v27 +; CHECK-DAG: vmrlf [[REG1:%v[0-9]+]], %v25, %v25 +; CHECK-DAG: vldeb [[REG2:%v[0-9]+]], [[REG0]] +; CHECK-DAG: vldeb [[REG3:%v[0-9]+]], [[REG1]] +; CHECK-DAG: vfchdb [[REG4:%v[0-9]+]], [[REG3]], [[REG2]] +; CHECK-DAG: vmrhf [[REG5:%v[0-9]+]], %v27, %v27 +; CHECK-DAG: vmrhf [[REG6:%v[0-9]+]], %v25, %v25 +; CHECK-DAG: vldeb [[REG7:%v[0-9]+]], [[REG5]] +; CHECK-DAG: vldeb [[REG8:%v[0-9]+]], [[REG6]] +; CHECK-DAG: vfchdb %v2, [[REG8]], [[REG7]] +; CHECK-NEXT: vpkg %v1, %v2, [[REG4]] ; CHECK-NEXT: vn %v0, %v0, %v1 ; CHECK-NEXT: vsel %v24, %v29, %v31, %v0 ; CHECK-NEXT: br %r14 @@ -787,9 +787,9 @@ ; CHECK-Z14: # %bb.0: ; CHECK-Z14-NEXT: vfchdb %v0, %v26, %v30 ; CHECK-Z14-NEXT: vfchdb %v1, %v24, %v28 -; CHECK-Z14-NEXT: vpkg %v0, %v1, %v0 -; CHECK-Z14-NEXT: vfchsb %v1, %v25, %v27 -; CHECK-Z14-NEXT: vn %v0, %v0, %v1 +; CHECK-Z14-DAG: vpkg %v0, %v1, %v0 +; CHECK-Z14-DAG: vfchsb [[REG0:%v[0-9]+]], %v25, %v27 +; CHECK-Z14-NEXT: vn %v0, %v0, [[REG0]] ; CHECK-Z14-NEXT: vsel %v24, %v29, %v31, %v0 ; CHECK-Z14-NEXT: br %r14 %cmp0 = fcmp ogt <4 x double> %val1, %val2 @@ -802,13 +802,13 @@ define <4 x double> @fun34(<4 x double> %val1, <4 x double> %val2, <4 x float> %val3, <4 x float> %val4, <4 x double> %val5, <4 x double> %val6) { ; CHECK-LABEL: fun34: ; CHECK: # %bb.0: -; CHECK-NEXT: vmrlf [[REG0:%v[0-9]+]], %v27, %v27 -; CHECK-NEXT: vmrlf [[REG1:%v[0-9]+]], %v25, %v25 -; CHECK-NEXT: vldeb [[REG2:%v[0-9]+]], [[REG0]] -; CHECK-NEXT: vldeb [[REG3:%v[0-9]+]], [[REG1]] -; CHECK-NEXT: vfchdb [[REG4:%v[0-9]+]], [[REG3]], [[REG2]] -; CHECK-NEXT: vmrhf [[REG5:%v[0-9]+]], %v27, %v27 -; CHECK-NEXT: vmrhf [[REG6:%v[0-9]+]], %v25, %v25 +; CHECK-DAG: vmrlf [[REG0:%v[0-9]+]], %v27, %v27 +; CHECK-DAG: vmrlf [[REG1:%v[0-9]+]], %v25, %v25 +; CHECK-DAG: vldeb [[REG2:%v[0-9]+]], [[REG0]] +; CHECK-DAG: vldeb [[REG3:%v[0-9]+]], [[REG1]] +; CHECK-DAG: vfchdb [[REG4:%v[0-9]+]], [[REG3]], [[REG2]] +; CHECK-DAG: vmrhf [[REG5:%v[0-9]+]], %v27, %v27 +; CHECK-DAG: vmrhf [[REG6:%v[0-9]+]], %v25, %v25 ; CHECK-DAG: vldeb [[REG7:%v[0-9]+]], [[REG5]] ; CHECK-DAG: vl [[REG8:%v[0-9]+]], 176(%r15) ; CHECK-DAG: vldeb [[REG9:%v[0-9]+]], [[REG6]] @@ -820,26 +820,26 @@ ; CHECK-NEXT: vfchdb [[REG15:%v[0-9]+]], %v24, %v28 ; CHECK-NEXT: vfchdb [[REG16:%v[0-9]+]], %v26, %v30 ; CHECK-NEXT: vuphf [[REG17:%v[0-9]+]], [[REG14]] -; CHECK-NEXT: vn [[REG18:%v[0-9]+]], [[REG16]], [[REG17]] -; CHECK-NEXT: vn [[REG19:%v[0-9]+]], [[REG15]], [[REG13]] +; CHECK-DAG: vn [[REG18:%v[0-9]+]], [[REG16]], [[REG17]] +; CHECK-DAG: vn [[REG19:%v[0-9]+]], [[REG15]], [[REG13]] ; CHECK-NEXT: vsel %v24, %v29, [[REG10]], [[REG19]] ; CHECK-NEXT: vsel %v26, %v31, [[REG8]], [[REG18]] ; CHECK-NEXT: br %r14 ; ; CHECK-Z14-LABEL: fun34: ; CHECK-Z14: # %bb.0: -; CHECK-Z14-NEXT: vfchsb %v4, %v25, %v27 -; CHECK-Z14-NEXT: vuphf %v5, %v4 -; CHECK-Z14-NEXT: vmrlg %v4, %v4, %v4 -; CHECK-Z14-NEXT: vfchdb %v2, %v24, %v28 -; CHECK-Z14-NEXT: vfchdb %v3, %v26, %v30 -; CHECK-Z14-NEXT: vuphf %v4, %v4 +; CHECK-Z14-NEXT: vfchsb [[REG0:%v[0-9]+]], %v25, %v27 +; CHECK-Z14-NEXT: vuphf [[REG1:%v[0-9]+]], [[REG0]] +; CHECK-Z14-NEXT: vmrlg [[REG0]], [[REG0]], [[REG0]] +; CHECK-Z14-NEXT: vfchdb [[REG2:%v[0-9]+]], %v24, %v28 +; CHECK-Z14-NEXT: vfchdb [[REG3:%v[0-9]+]], %v26, %v30 +; CHECK-Z14-NEXT: vuphf [[REG0]], [[REG0]] ; CHECK-Z14-NEXT: vl %v0, 176(%r15) -; CHECK-Z14-NEXT: vl %v1, 160(%r15) -; CHECK-Z14-NEXT: vn %v3, %v3, %v4 -; CHECK-Z14-NEXT: vn %v2, %v2, %v5 -; CHECK-Z14-NEXT: vsel %v24, %v29, %v1, %v2 -; CHECK-Z14-NEXT: vsel %v26, %v31, %v0, %v3 +; CHECK-Z14-NEXT: vl [[REG4:%v[0-9]+]], 160(%r15) +; CHECK-Z14-DAG: vn [[REG5:%v[0-9]+]], [[REG3]], [[REG0]] +; CHECK-Z14-DAG: vn [[REG6:%v[0-9]+]], [[REG2]], [[REG1]] +; CHECK-Z14-NEXT: vsel %v24, %v29, [[REG4]], [[REG6]] +; CHECK-Z14-NEXT: vsel %v26, %v31, %v0, [[REG5]] ; CHECK-Z14-NEXT: br %r14 %cmp0 = fcmp ogt <4 x double> %val1, %val2 %cmp1 = fcmp ogt <4 x float> %val3, %val4 Index: test/CodeGen/SystemZ/vec-cmpsel.ll =================================================================== --- test/CodeGen/SystemZ/vec-cmpsel.ll +++ test/CodeGen/SystemZ/vec-cmpsel.ll @@ -316,17 +316,17 @@ define <2 x float> @fun25(<2 x float> %val1, <2 x float> %val2, <2 x float> %val3, <2 x float> %val4) { ; CHECK-LABEL: fun25: ; CHECK: # %bb.0: -; CHECK-NEXT: vmrlf %v0, %v26, %v26 -; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vpkg %v0, %v1, %v0 +; CHECK-DAG: vmrlf %v0, %v26, %v26 +; CHECK-DAG: vmrlf %v1, %v24, %v24 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb [[REG0:%v[0-9]+]], %v1, %v0 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG2:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vldeb [[REG2]], [[REG2]] +; CHECK-DAG: vfchdb [[REG3:%v[0-9]+]], [[REG2]], [[REG1]] +; CHECK-NEXT: vpkg %v0, [[REG3]], [[REG0]] ; CHECK-NEXT: vsel %v24, %v28, %v30, %v0 ; CHECK-NEXT: br %r14 @@ -344,16 +344,16 @@ define <2 x double> @fun26(<2 x float> %val1, <2 x float> %val2, <2 x double> %val3, <2 x double> %val4) { ; CHECK-LABEL: fun26: ; CHECK: # %bb.0: -; CHECK-NEXT: vmrlf %v0, %v26, %v26 -; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 +; CHECK-DAG: vmrlf %v0, %v26, %v26 +; CHECK-DAG: vmrlf %v1, %v24, %v24 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb %v0, %v1, %v0 +; CHECK-DAG: vmrhf [[REG0:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb %v1, [[REG1]], [[REG0]] ; CHECK-NEXT: vpkg %v0, %v1, %v0 ; CHECK-NEXT: vuphf %v0, %v0 ; CHECK-NEXT: vsel %v24, %v28, %v30, %v0 @@ -391,14 +391,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb %v0, %v1, %v0 +; CHECK-DAG: vmrhf [[REG0:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb %v1, [[REG1]], [[REG0]] ; CHECK-NEXT: vpkg %v0, %v1, %v0 ; CHECK-NEXT: vsel %v24, %v28, %v30, %v0 ; CHECK-NEXT: br %r14 @@ -419,15 +419,15 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vpkg [[REG0:%v[0-9]+]], %v1, %v0 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb %v0, %v1, %v0 +; CHECK-DAG: vmrhf [[REG0:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb %v1, [[REG1]], [[REG0]] +; CHECK-DAG: vpkg [[REG0:%v[0-9]+]], %v1, %v0 ; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], [[REG0]], [[REG0]] ; CHECK-DAG: vuphf [[REG1]], [[REG1]] ; CHECK-DAG: vuphf [[REG2:%v[0-9]+]], [[REG0]] Index: test/CodeGen/SystemZ/vec-ctpop-01.ll =================================================================== --- test/CodeGen/SystemZ/vec-ctpop-01.ll +++ test/CodeGen/SystemZ/vec-ctpop-01.ll @@ -30,8 +30,8 @@ define <4 x i32> @f3(<4 x i32> %a) { ; CHECK-LABEL: f3: -; CHECK: vpopct [[T1:%v[0-9]+]], %v24, 0 -; CHECK: vgbm [[T2:%v[0-9]+]], 0 +; CHECK-DAG: vpopct [[T1:%v[0-9]+]], %v24, 0 +; CHECK-DAG: vgbm [[T2:%v[0-9]+]], 0 ; CHECK: vsumb %v24, [[T1]], [[T2]] ; CHECK: br %r14 @@ -41,8 +41,8 @@ define <2 x i64> @f4(<2 x i64> %a) { ; CHECK-LABEL: f4: -; CHECK: vpopct [[T1:%v[0-9]+]], %v24, 0 -; CHECK: vgbm [[T2:%v[0-9]+]], 0 +; CHECK-DAG: vpopct [[T1:%v[0-9]+]], %v24, 0 +; CHECK-DAG: vgbm [[T2:%v[0-9]+]], 0 ; CHECK: vsumb [[T3:%v[0-9]+]], [[T1]], [[T2]] ; CHECK: vsumgf %v24, [[T3]], [[T2]] ; CHECK: br %r14