Index: lib/Target/SystemZ/SystemZMachineScheduler.h =================================================================== --- lib/Target/SystemZ/SystemZMachineScheduler.h +++ lib/Target/SystemZ/SystemZMachineScheduler.h @@ -151,6 +151,21 @@ void releaseBottomNode(SUnit *SU) override {}; }; +class SystemZPreRASchedStrategy : public GenericScheduler { + const SystemZSubtarget *ST; + + // The VectorUnit index is 6 for both z13 and z14. + const unsigned SystemZVectorUnitIdx = 6; + + public: + SystemZPreRASchedStrategy(const MachineSchedContext *C) : + GenericScheduler(C), ST(&C->MF->getSubtarget()) {} + + void tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) const override; +}; + } // end namespace llvm #endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H Index: lib/Target/SystemZ/SystemZMachineScheduler.cpp =================================================================== --- lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -265,3 +265,173 @@ // Put all released SUs in the Available set. Available.insert(SU); } + + +//////////// Pre-RA scheduling + +// EXPERIMENTAL +#include "llvm/Support/CommandLine.h" +static cl::opt CHECK_ACYC("check-acyc", cl::init(false)); +static cl::opt VEC_LAT("vec-lat", cl::init(false)); + +// This is mostly copied from MachineScheduler.cpp. +void SystemZPreRASchedStrategy:: +tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) const { + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; + return; + } + + if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop), + biasPhysRegCopy(Cand.SU, Cand.AtTop), + TryCand, Cand, PhysRegCopy)) + return; + + // Avoid exceeding the target's limit. + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, + Cand.RPDelta.Excess, + TryCand, Cand, RegExcess, TRI, + DAG->MF)) + return; + + // Avoid increasing the max critical pressure in the scheduled region. + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, + Cand.RPDelta.CriticalMax, + TryCand, Cand, RegCritical, TRI, + DAG->MF)) + return; + + // We only compare a subset of features when comparing nodes between + // Top and Bottom boundary. Some properties are simply incomparable, in many + // other instances we should only override the other boundary if something + // is a clear good pick on one boundary. Skip heuristics that are more + // "tie-breaking" in nature. + bool SameBoundary = Zone != nullptr; + if (SameBoundary) { + // For loops that are acyclic path limited, aggressively schedule for + // latency. Within an single cycle, whenever CurrMOps > 0, allow normal + // heuristics to take precedence. + if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && + tryLatency(TryCand, Cand, *Zone)) + return; + + // Prioritize instructions that read unbuffered resources by stall cycles. + if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), + Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) + return; + } + + // Keep clustered nodes together to encourage downstream peephole + // optimizations which may reduce resource requirements. + // + // This is a best effort to set things up for a post-RA pass. Optimizations + // like generating loads of multiple registers should ideally be done within + // the scheduler pass by combining the loads during DAG postprocessing. + const SUnit *CandNextClusterSU = + Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + const SUnit *TryCandNextClusterSU = + TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + if (tryGreater(TryCand.SU == TryCandNextClusterSU, + Cand.SU == CandNextClusterSU, + TryCand, Cand, Cluster)) + return; + + if (SameBoundary) { + // Weak edges are for clustering and other constraints. + if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), + getWeakLeft(Cand.SU, Cand.AtTop), + TryCand, Cand, Weak)) + return; + } + + // Avoid increasing the max pressure of the entire region. + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, + Cand.RPDelta.CurrentMax, + TryCand, Cand, RegMax, TRI, + DAG->MF)) + return; + + // SystemZ specific: Latency boost for instructions using the vector unit. + bool DO = (!Rem.IsAcyclicLatencyLimited || !CHECK_ACYC); +if (!VEC_LAT){ + if (ST->hasVector() && DO) { + assert ((std:: + string(SchedModel->getProcResource(SystemZVectorUnitIdx)->Name) + .find("VecUnit") != std::string::npos) && + "Hard coded index for vector unit changed!"); + bool VectorPipeline = false; + const MCSchedClassDesc *SC = DAG->getSchedClass(TryCand.SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + if (PI->ProcResourceIdx == SystemZVectorUnitIdx) { + VectorPipeline = true; + break; + } + } + if (VectorPipeline && tryLatency(TryCand, Cand, *Zone)) + return; + } +} else { + bool VecPipe_TryC = false; + const MCSchedClassDesc *SC = DAG->getSchedClass(TryCand.SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + if (PI->ProcResourceIdx == SystemZVectorUnitIdx) { + VecPipe_TryC = true; + break; + } + } + bool VecPipe_Cand = false; + SC = DAG->getSchedClass(Cand.SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + if (PI->ProcResourceIdx == SystemZVectorUnitIdx) { + VecPipe_Cand = true; + break; + } + } + + CandReason Reason_Cand = Cand.Reason; + CandReason Reason_TryCand = TryCand.Reason; + if (VecPipe_TryC && tryLatency(TryCand, Cand, *Zone)) { + if (Reason_Cand != Cand.Reason) { + if (!VecPipe_Cand) + Cand.Reason = Reason_Cand; + else + return; + } else if (Reason_TryCand != TryCand.Reason) { + return; + } + } +} + + if (SameBoundary) { + // Avoid critical resource consumption and balance the schedule. + TryCand.initResourceDelta(DAG, SchedModel); + if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand, ResourceReduce)) + return; + if (tryGreater(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, + TryCand, Cand, ResourceDemand)) + return; + + // Avoid serializing long latency dependence chains. + // For acyclic path limited loops, latency was already checked above. + if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && + !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) + return; + + // Fall through to original instruction order. + if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) + || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { + TryCand.Reason = NodeOrder; + } + } +} Index: lib/Target/SystemZ/SystemZTargetMachine.cpp =================================================================== --- lib/Target/SystemZ/SystemZTargetMachine.cpp +++ lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -167,6 +167,17 @@ } ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + // To run the generic pre-RA scheduler use: -misched=converge + ScheduleDAGMILive *DAG = + new ScheduleDAGMILive(C, llvm::make_unique(C)); + + // Use same DAG mutators as are applied in createGenericSchedLive(). + DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); + return DAG; + } + + ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { return new ScheduleDAGMI(C, llvm::make_unique(C), Index: test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll =================================================================== --- test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll +++ test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll @@ -64,7 +64,7 @@ ; CHECK-DAG: vceqh [[REG4:%v[0-9]+]], %v30, %v27 ; CHECK-DAG: vl [[REG5:%v[0-9]+]], 176(%r15) ; CHECK-DAG: vl [[REG6:%v[0-9]+]], 160(%r15) -; CHECK-DAG: vo [[REG7:%v[0-9]+]], %v2, [[REG4]] +; CHECK-DAG: vo [[REG7:%v[0-9]+]], [[REG1]], [[REG4]] ; CHECK-DAG: vo [[REG8:%v[0-9]+]], [[REG2]], [[REG3]] ; CHECK-DAG: vsel %v24, %v29, [[REG6]], [[REG8]] ; CHECK-DAG: vsel %v26, %v31, [[REG5]], [[REG7]] @@ -439,9 +439,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vceqg %v0, %v26, %v30 ; CHECK-NEXT: vceqg %v1, %v24, %v28 -; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vceqf %v1, %v25, %v27 -; CHECK-NEXT: vx %v0, %v0, %v1 +; CHECK-DAG: vpkg %v0, %v1, %v0 +; CHECK-DAG: vceqf [[REG0:%v[0-9]+]], %v25, %v27 +; CHECK-NEXT: vx %v0, %v0, [[REG0]] ; CHECK-NEXT: vsel %v24, %v29, %v31, %v0 ; CHECK-NEXT: br %r14 %cmp0 = icmp eq <4 x i64> %val1, %val2 @@ -479,18 +479,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vfchdb %v1, %v28, %v30 -; CHECK-NEXT: vpkg %v1, %v1, %v1 -; CHECK-NEXT: vo %v0, %v0, %v1 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb %v0, %v1, %v0 +; CHECK-DAG: vmrhf [[REG0:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb %v1, [[REG1]], [[REG0]] +; CHECK-DAG: vpkg %v0, %v1, %v0 +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v28, %v30 +; CHECK-DAG: vpkg [[REG2]], [[REG2]], [[REG2]] +; CHECK-NEXT: vo %v0, %v0, [[REG2]] ; CHECK-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-NEXT: br %r14 ; @@ -514,26 +514,26 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vuphf %v0, %v0 -; CHECK-NEXT: vfchdb %v1, %v28, %v30 -; CHECK-NEXT: vo %v0, %v0, %v1 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb %v0, %v1, %v0 +; CHECK-DAG: vmrhf [[REG0:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb %v1, [[REG1]], [[REG0]] +; CHECK-DAG: vpkg %v0, %v1, %v0 +; CHECK-DAG: vuphf %v0, %v0 +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v28, %v30 +; CHECK-NEXT: vo %v0, %v0, [[REG2]] ; CHECK-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-NEXT: br %r14 ; ; CHECK-Z14-LABEL: fun26: ; CHECK-Z14: # %bb.0: ; CHECK-Z14-NEXT: vfchsb %v0, %v24, %v26 -; CHECK-Z14-NEXT: vuphf %v0, %v0 -; CHECK-Z14-NEXT: vfchdb %v1, %v28, %v30 +; CHECK-Z14-DAG: vuphf %v0, %v0 +; CHECK-Z14-DAG: vfchdb %v1, %v28, %v30 ; CHECK-Z14-NEXT: vo %v0, %v0, %v1 ; CHECK-Z14-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-Z14-NEXT: br %r14 @@ -585,8 +585,8 @@ ; CHECK-DAG: vmrhf [[REG17:%v[0-9]+]], %v30, %v30 ; CHECK-DAG: vldeb [[REG19:%v[0-9]+]], [[REG17]] ; CHECK-DAG: vldeb [[REG20:%v[0-9]+]], [[REG8]] -; CHECK-NEXT: vfchdb %v2, [[REG20]], [[REG19]] -; CHECK-NEXT: vpkg [[REG21:%v[0-9]+]], %v2, [[REG16]] +; CHECK-NEXT: vfchdb [[REG22:%v[0-9]+]], [[REG20]], [[REG19]] +; CHECK-NEXT: vpkg [[REG21:%v[0-9]+]], [[REG22]], [[REG16]] ; CHECK-NEXT: vx %v0, [[REG11]], [[REG21]] ; CHECK-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-NEXT: br %r14 @@ -610,30 +610,30 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vmrhf %v3, %v28, %v28 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vmrlf %v1, %v30, %v30 -; CHECK-NEXT: vmrlf %v2, %v28, %v28 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vmrhf %v2, %v30, %v30 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vldeb %v3, %v3 -; CHECK-NEXT: vfchdb %v2, %v3, %v2 -; CHECK-NEXT: vpkg %v1, %v2, %v1 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb %v0, %v1, %v0 +; CHECK-DAG: vmrhf [[REG0:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vmrhf [[REG2:%v[0-9]+]], %v28, %v28 +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb [[REG3:%v[0-9]+]], [[REG1]], [[REG0]] +; CHECK-DAG: vpkg %v0, [[REG3]], %v0 +; CHECK-DAG: vmrlf %v1, %v30, %v30 +; CHECK-DAG: vmrlf [[REG4:%v[0-9]+]], %v28, %v28 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vldeb [[REG4]], [[REG4]] +; CHECK-DAG: vfchdb %v1, [[REG4]], %v1 +; CHECK-DAG: vmrhf [[REG5:%v[0-9]+]], %v30, %v30 +; CHECK-NEXT: vldeb [[REG5]], [[REG5]] +; CHECK-NEXT: vldeb [[REG2]], [[REG2]] +; CHECK-NEXT: vfchdb [[REG6:%v[0-9]+]], [[REG2]], [[REG5]] +; CHECK-NEXT: vpkg %v1, [[REG6]], %v1 ; CHECK-NEXT: vx %v0, %v0, %v1 ; CHECK-NEXT: vmrlg %v1, %v0, %v0 -; CHECK-NEXT: vuphf %v1, %v1 -; CHECK-NEXT: vuphf %v0, %v0 +; CHECK-DAG: vuphf %v1, %v1 +; CHECK-DAG: vuphf %v0, %v0 ; CHECK-NEXT: vsel %v24, %v25, %v29, %v0 ; CHECK-NEXT: vsel %v26, %v27, %v31, %v1 ; CHECK-NEXT: br %r14 @@ -644,8 +644,8 @@ ; CHECK-Z14-NEXT: vfchsb %v1, %v28, %v30 ; CHECK-Z14-NEXT: vx %v0, %v0, %v1 ; CHECK-Z14-NEXT: vmrlg %v1, %v0, %v0 -; CHECK-Z14-NEXT: vuphf %v1, %v1 -; CHECK-Z14-NEXT: vuphf %v0, %v0 +; CHECK-Z14-DAG: vuphf %v1, %v1 +; CHECK-Z14-DAG: vuphf %v0, %v0 ; CHECK-Z14-NEXT: vsel %v24, %v25, %v29, %v0 ; CHECK-Z14-NEXT: vsel %v26, %v27, %v31, %v1 ; CHECK-Z14-NEXT: br %r14 @@ -659,70 +659,70 @@ define <8 x float> @fun30(<8 x float> %val1, <8 x float> %val2, <8 x double> %val3, <8 x double> %val4, <8 x float> %val5, <8 x float> %val6) { ; CHECK-LABEL: fun30: ; CHECK: # %bb.0: -; CHECK-NEXT: vmrlf %v16, %v28, %v28 -; CHECK-NEXT: vmrlf %v17, %v24, %v24 -; CHECK-NEXT: vldeb %v16, %v16 -; CHECK-NEXT: vldeb %v17, %v17 -; CHECK-NEXT: vfchdb %v16, %v17, %v16 -; CHECK-NEXT: vmrhf %v17, %v28, %v28 -; CHECK-NEXT: vmrhf %v18, %v24, %v24 -; CHECK-NEXT: vldeb %v17, %v17 -; CHECK-NEXT: vl %v4, 192(%r15) -; CHECK-NEXT: vldeb %v18, %v18 -; CHECK-NEXT: vl %v5, 208(%r15) -; CHECK-NEXT: vl %v6, 160(%r15) -; CHECK-NEXT: vl %v7, 176(%r15) -; CHECK-NEXT: vl %v0, 272(%r15) -; CHECK-NEXT: vl %v1, 240(%r15) -; CHECK-NEXT: vfchdb %v17, %v18, %v17 -; CHECK-NEXT: vl %v2, 256(%r15) -; CHECK-NEXT: vl %v3, 224(%r15) -; CHECK-NEXT: vpkg %v16, %v17, %v16 -; CHECK-NEXT: vmrlf %v17, %v30, %v30 -; CHECK-NEXT: vmrlf %v18, %v26, %v26 -; CHECK-NEXT: vmrhf %v19, %v26, %v26 -; CHECK-NEXT: vfchdb %v7, %v27, %v7 -; CHECK-NEXT: vfchdb %v6, %v25, %v6 -; CHECK-NEXT: vfchdb %v5, %v31, %v5 -; CHECK-NEXT: vfchdb %v4, %v29, %v4 -; CHECK-NEXT: vpkg %v6, %v6, %v7 -; CHECK-NEXT: vpkg %v4, %v4, %v5 -; CHECK-NEXT: vn %v5, %v16, %v6 -; CHECK-NEXT: vsel %v24, %v3, %v2, %v5 -; CHECK-NEXT: vldeb %v17, %v17 -; CHECK-NEXT: vldeb %v18, %v18 -; CHECK-NEXT: vfchdb %v17, %v18, %v17 -; CHECK-NEXT: vmrhf %v18, %v30, %v30 -; CHECK-NEXT: vldeb %v18, %v18 -; CHECK-NEXT: vldeb %v19, %v19 -; CHECK-NEXT: vfchdb %v18, %v19, %v18 -; CHECK-NEXT: vpkg %v17, %v18, %v17 -; CHECK-NEXT: vn %v4, %v17, %v4 -; CHECK-NEXT: vsel %v26, %v1, %v0, %v4 +; CHECK-DAG: vmrlf [[REG0:%v[0-9]+]], %v28, %v28 +; CHECK-DAG: vmrlf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], [[REG1]], [[REG0]] +; CHECK-DAG: vmrhf %v17, %v28, %v28 +; CHECK-DAG: vmrhf %v18, %v24, %v24 +; CHECK-DAG: vldeb %v17, %v17 +; CHECK-DAG: vl [[REG3:%v[0-9]+]], 192(%r15) +; CHECK-DAG: vldeb %v18, %v18 +; CHECK-DAG: vl [[REG4:%v[0-9]+]], 208(%r15) +; CHECK-DAG: vl [[REG5:%v[0-9]+]], 160(%r15) +; CHECK-DAG: vl [[REG6:%v[0-9]+]], 176(%r15) +; CHECK-DAG: vl [[REG7:%v[0-9]+]], 272(%r15) +; CHECK-DAG: vl [[REG8:%v[0-9]+]], 240(%r15) +; CHECK-DAG: vfchdb [[REG9:%v[0-9]+]], %v18, %v17 +; CHECK-DAG: vl [[REG10:%v[0-9]+]], 256(%r15) +; CHECK-DAG: vl [[REG11:%v[0-9]+]], 224(%r15) +; CHECK-DAG: vpkg [[REG12:%v[0-9]+]], [[REG9]], [[REG2]] +; CHECK-DAG: vmrlf [[REG13:%v[0-9]+]], %v30, %v30 +; CHECK-DAG: vmrlf [[REG14:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG15:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vfchdb [[REG16:%v[0-9]+]], %v27, [[REG6]] +; CHECK-DAG: vfchdb [[REG17:%v[0-9]+]], %v25, [[REG5]] +; CHECK-DAG: vfchdb [[REG18:%v[0-9]+]], %v31, [[REG4]] +; CHECK-DAG: vfchdb [[REG19:%v[0-9]+]], %v29, [[REG3]] +; CHECK-DAG: vpkg [[REG20:%v[0-9]+]], [[REG17]], [[REG16]] +; CHECK-DAG: vpkg [[REG21:%v[0-9]+]], [[REG19]], [[REG18]] +; CHECK-DAG: vn [[REG22:%v[0-9]+]], [[REG12]], [[REG20]] +; CHECK-DAG: vsel %v24, [[REG11]], [[REG10]], [[REG22]] +; CHECK-DAG: vldeb [[REG13]], [[REG13]] +; CHECK-DAG: vldeb [[REG14]], [[REG14]] +; CHECK-DAG: vfchdb [[REG23:%v[0-9]+]], [[REG14]], [[REG13]] +; CHECK-DAG: vmrhf [[REG24:%v[0-9]+]], %v30, %v30 +; CHECK-DAG: vldeb [[REG24]], [[REG24]] +; CHECK-DAG: vldeb [[REG15]], [[REG15]] +; CHECK-DAG: vfchdb [[REG25:%v[0-9]+]], [[REG15]], [[REG24]] +; CHECK-DAG: vpkg [[REG26:%v[0-9]+]], [[REG25]], [[REG23]] +; CHECK-DAG: vn [[REG27:%v[0-9]+]], [[REG26]], [[REG21]] +; CHECK-DAG: vsel %v26, [[REG8]], [[REG7]], [[REG27]] ; CHECK-NEXT: br %r14 ; ; CHECK-Z14-LABEL: fun30: ; CHECK-Z14: # %bb.0: -; CHECK-Z14-NEXT: vl %v4, 192(%r15) -; CHECK-Z14-NEXT: vl %v5, 208(%r15) -; CHECK-Z14-NEXT: vl %v6, 160(%r15) -; CHECK-Z14-NEXT: vl %v7, 176(%r15) -; CHECK-Z14-NEXT: vfchdb %v7, %v27, %v7 -; CHECK-Z14-NEXT: vfchdb %v6, %v25, %v6 -; CHECK-Z14-NEXT: vfchdb %v5, %v31, %v5 -; CHECK-Z14-NEXT: vfchdb %v4, %v29, %v4 -; CHECK-Z14-NEXT: vfchsb %v16, %v24, %v28 -; CHECK-Z14-NEXT: vfchsb %v17, %v26, %v30 -; CHECK-Z14-NEXT: vpkg %v6, %v6, %v7 -; CHECK-Z14-NEXT: vpkg %v4, %v4, %v5 +; CHECK-Z14-NEXT: vl [[REG0:%v[0-9]+]], 192(%r15) +; CHECK-Z14-NEXT: vl [[REG1:%v[0-9]+]], 208(%r15) +; CHECK-Z14-NEXT: vl [[REG2:%v[0-9]+]], 160(%r15) +; CHECK-Z14-NEXT: vl [[REG3:%v[0-9]+]], 176(%r15) +; CHECK-Z14-NEXT: vfchdb [[REG4:%v[0-9]+]], %v27, [[REG3]] +; CHECK-Z14-NEXT: vfchdb [[REG5:%v[0-9]+]], %v25, [[REG2]] +; CHECK-Z14-NEXT: vfchdb [[REG6:%v[0-9]+]], %v31, [[REG1]] +; CHECK-Z14-NEXT: vfchdb [[REG7:%v[0-9]+]], %v29, [[REG0]] +; CHECK-Z14-NEXT: vfchsb [[REG8:%v[0-9]+]], %v24, %v28 +; CHECK-Z14-NEXT: vfchsb [[REG9:%v[0-9]+]], %v26, %v30 +; CHECK-Z14-NEXT: vpkg [[REG10:%v[0-9]+]], [[REG5]], [[REG4]] +; CHECK-Z14-NEXT: vpkg [[REG11:%v[0-9]+]], [[REG7]], [[REG6]] ; CHECK-Z14-NEXT: vl %v0, 272(%r15) ; CHECK-Z14-NEXT: vl %v1, 240(%r15) ; CHECK-Z14-NEXT: vl %v2, 256(%r15) -; CHECK-Z14-NEXT: vl %v3, 224(%r15) -; CHECK-Z14-NEXT: vn %v4, %v17, %v4 -; CHECK-Z14-NEXT: vn %v5, %v16, %v6 -; CHECK-Z14-NEXT: vsel %v24, %v3, %v2, %v5 -; CHECK-Z14-NEXT: vsel %v26, %v1, %v0, %v4 +; CHECK-Z14-NEXT: vl [[REG14:%v[0-9]+]], 224(%r15) +; CHECK-Z14-NEXT: vn [[REG12:%v[0-9]+]], [[REG9]], [[REG11]] +; CHECK-Z14-NEXT: vn [[REG13:%v[0-9]+]], [[REG8]], [[REG10]] +; CHECK-Z14-NEXT: vsel %v24, [[REG14]], %v2, [[REG13]] +; CHECK-Z14-NEXT: vsel %v26, %v1, %v0, [[REG12]] ; CHECK-Z14-NEXT: br %r14 %cmp0 = fcmp ogt <8 x float> %val1, %val2 %cmp1 = fcmp ogt <8 x double> %val3, %val4 @@ -765,20 +765,20 @@ define <4 x float> @fun33(<4 x double> %val1, <4 x double> %val2, <4 x float> %val3, <4 x float> %val4, <4 x float> %val5, <4 x float> %val6) { ; CHECK-LABEL: fun33: ; CHECK: # %bb.0: -; CHECK-NEXT: vfchdb %v0, %v26, %v30 -; CHECK-NEXT: vfchdb %v1, %v24, %v28 -; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vmrlf %v1, %v27, %v27 -; CHECK-NEXT: vmrlf %v2, %v25, %v25 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vmrhf %v2, %v27, %v27 -; CHECK-NEXT: vmrhf %v3, %v25, %v25 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vldeb %v3, %v3 -; CHECK-NEXT: vfchdb %v2, %v3, %v2 -; CHECK-NEXT: vpkg %v1, %v2, %v1 +; CHECK-DAG: vfchdb %v0, %v26, %v30 +; CHECK-DAG: vfchdb %v1, %v24, %v28 +; CHECK-DAG: vpkg %v0, %v1, %v0 +; CHECK-DAG: vmrlf [[REG0:%v[0-9]+]], %v27, %v27 +; CHECK-DAG: vmrlf [[REG1:%v[0-9]+]], %v25, %v25 +; CHECK-DAG: vldeb [[REG2:%v[0-9]+]], [[REG0]] +; CHECK-DAG: vldeb [[REG3:%v[0-9]+]], [[REG1]] +; CHECK-DAG: vfchdb [[REG4:%v[0-9]+]], [[REG3]], [[REG2]] +; CHECK-DAG: vmrhf [[REG5:%v[0-9]+]], %v27, %v27 +; CHECK-DAG: vmrhf [[REG6:%v[0-9]+]], %v25, %v25 +; CHECK-DAG: vldeb [[REG7:%v[0-9]+]], [[REG5]] +; CHECK-DAG: vldeb [[REG8:%v[0-9]+]], [[REG6]] +; CHECK-DAG: vfchdb %v2, [[REG8]], [[REG7]] +; CHECK-NEXT: vpkg %v1, %v2, [[REG4]] ; CHECK-NEXT: vn %v0, %v0, %v1 ; CHECK-NEXT: vsel %v24, %v29, %v31, %v0 ; CHECK-NEXT: br %r14 @@ -787,9 +787,9 @@ ; CHECK-Z14: # %bb.0: ; CHECK-Z14-NEXT: vfchdb %v0, %v26, %v30 ; CHECK-Z14-NEXT: vfchdb %v1, %v24, %v28 -; CHECK-Z14-NEXT: vpkg %v0, %v1, %v0 -; CHECK-Z14-NEXT: vfchsb %v1, %v25, %v27 -; CHECK-Z14-NEXT: vn %v0, %v0, %v1 +; CHECK-Z14-DAG: vpkg %v0, %v1, %v0 +; CHECK-Z14-DAG: vfchsb [[REG0:%v[0-9]+]], %v25, %v27 +; CHECK-Z14-NEXT: vn %v0, %v0, [[REG0]] ; CHECK-Z14-NEXT: vsel %v24, %v29, %v31, %v0 ; CHECK-Z14-NEXT: br %r14 %cmp0 = fcmp ogt <4 x double> %val1, %val2 @@ -802,13 +802,13 @@ define <4 x double> @fun34(<4 x double> %val1, <4 x double> %val2, <4 x float> %val3, <4 x float> %val4, <4 x double> %val5, <4 x double> %val6) { ; CHECK-LABEL: fun34: ; CHECK: # %bb.0: -; CHECK-NEXT: vmrlf [[REG0:%v[0-9]+]], %v27, %v27 -; CHECK-NEXT: vmrlf [[REG1:%v[0-9]+]], %v25, %v25 -; CHECK-NEXT: vldeb [[REG2:%v[0-9]+]], [[REG0]] -; CHECK-NEXT: vldeb [[REG3:%v[0-9]+]], [[REG1]] -; CHECK-NEXT: vfchdb [[REG4:%v[0-9]+]], [[REG3]], [[REG2]] -; CHECK-NEXT: vmrhf [[REG5:%v[0-9]+]], %v27, %v27 -; CHECK-NEXT: vmrhf [[REG6:%v[0-9]+]], %v25, %v25 +; CHECK-DAG: vmrlf [[REG0:%v[0-9]+]], %v27, %v27 +; CHECK-DAG: vmrlf [[REG1:%v[0-9]+]], %v25, %v25 +; CHECK-DAG: vldeb [[REG2:%v[0-9]+]], [[REG0]] +; CHECK-DAG: vldeb [[REG3:%v[0-9]+]], [[REG1]] +; CHECK-DAG: vfchdb [[REG4:%v[0-9]+]], [[REG3]], [[REG2]] +; CHECK-DAG: vmrhf [[REG5:%v[0-9]+]], %v27, %v27 +; CHECK-DAG: vmrhf [[REG6:%v[0-9]+]], %v25, %v25 ; CHECK-DAG: vldeb [[REG7:%v[0-9]+]], [[REG5]] ; CHECK-DAG: vl [[REG8:%v[0-9]+]], 176(%r15) ; CHECK-DAG: vldeb [[REG9:%v[0-9]+]], [[REG6]] @@ -820,26 +820,26 @@ ; CHECK-NEXT: vfchdb [[REG15:%v[0-9]+]], %v24, %v28 ; CHECK-NEXT: vfchdb [[REG16:%v[0-9]+]], %v26, %v30 ; CHECK-NEXT: vuphf [[REG17:%v[0-9]+]], [[REG14]] -; CHECK-NEXT: vn [[REG18:%v[0-9]+]], [[REG16]], [[REG17]] -; CHECK-NEXT: vn [[REG19:%v[0-9]+]], [[REG15]], [[REG13]] +; CHECK-DAG: vn [[REG18:%v[0-9]+]], [[REG16]], [[REG17]] +; CHECK-DAG: vn [[REG19:%v[0-9]+]], [[REG15]], [[REG13]] ; CHECK-NEXT: vsel %v24, %v29, [[REG10]], [[REG19]] ; CHECK-NEXT: vsel %v26, %v31, [[REG8]], [[REG18]] ; CHECK-NEXT: br %r14 ; ; CHECK-Z14-LABEL: fun34: ; CHECK-Z14: # %bb.0: -; CHECK-Z14-NEXT: vfchsb %v4, %v25, %v27 -; CHECK-Z14-NEXT: vuphf %v5, %v4 -; CHECK-Z14-NEXT: vmrlg %v4, %v4, %v4 -; CHECK-Z14-NEXT: vfchdb %v2, %v24, %v28 -; CHECK-Z14-NEXT: vfchdb %v3, %v26, %v30 -; CHECK-Z14-NEXT: vuphf %v4, %v4 +; CHECK-Z14-NEXT: vfchsb [[REG0:%v[0-9]+]], %v25, %v27 +; CHECK-Z14-NEXT: vuphf [[REG1:%v[0-9]+]], [[REG0]] +; CHECK-Z14-NEXT: vmrlg [[REG0]], [[REG0]], [[REG0]] +; CHECK-Z14-NEXT: vfchdb [[REG2:%v[0-9]+]], %v24, %v28 +; CHECK-Z14-NEXT: vfchdb [[REG3:%v[0-9]+]], %v26, %v30 +; CHECK-Z14-NEXT: vuphf [[REG0]], [[REG0]] ; CHECK-Z14-NEXT: vl %v0, 176(%r15) -; CHECK-Z14-NEXT: vl %v1, 160(%r15) -; CHECK-Z14-NEXT: vn %v3, %v3, %v4 -; CHECK-Z14-NEXT: vn %v2, %v2, %v5 -; CHECK-Z14-NEXT: vsel %v24, %v29, %v1, %v2 -; CHECK-Z14-NEXT: vsel %v26, %v31, %v0, %v3 +; CHECK-Z14-NEXT: vl [[REG4:%v[0-9]+]], 160(%r15) +; CHECK-Z14-DAG: vn [[REG5:%v[0-9]+]], [[REG3]], [[REG0]] +; CHECK-Z14-DAG: vn [[REG6:%v[0-9]+]], [[REG2]], [[REG1]] +; CHECK-Z14-NEXT: vsel %v24, %v29, [[REG4]], [[REG6]] +; CHECK-Z14-NEXT: vsel %v26, %v31, %v0, [[REG5]] ; CHECK-Z14-NEXT: br %r14 %cmp0 = fcmp ogt <4 x double> %val1, %val2 %cmp1 = fcmp ogt <4 x float> %val3, %val4 Index: test/CodeGen/SystemZ/vec-cmpsel.ll =================================================================== --- test/CodeGen/SystemZ/vec-cmpsel.ll +++ test/CodeGen/SystemZ/vec-cmpsel.ll @@ -316,17 +316,17 @@ define <2 x float> @fun25(<2 x float> %val1, <2 x float> %val2, <2 x float> %val3, <2 x float> %val4) { ; CHECK-LABEL: fun25: ; CHECK: # %bb.0: -; CHECK-NEXT: vmrlf %v0, %v26, %v26 -; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vpkg %v0, %v1, %v0 +; CHECK-DAG: vmrlf %v0, %v26, %v26 +; CHECK-DAG: vmrlf %v1, %v24, %v24 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb [[REG0:%v[0-9]+]], %v1, %v0 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG2:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vldeb [[REG2]], [[REG2]] +; CHECK-DAG: vfchdb [[REG3:%v[0-9]+]], [[REG2]], [[REG1]] +; CHECK-NEXT: vpkg %v0, [[REG3]], [[REG0]] ; CHECK-NEXT: vsel %v24, %v28, %v30, %v0 ; CHECK-NEXT: br %r14 @@ -344,16 +344,16 @@ define <2 x double> @fun26(<2 x float> %val1, <2 x float> %val2, <2 x double> %val3, <2 x double> %val4) { ; CHECK-LABEL: fun26: ; CHECK: # %bb.0: -; CHECK-NEXT: vmrlf %v0, %v26, %v26 -; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 +; CHECK-DAG: vmrlf %v0, %v26, %v26 +; CHECK-DAG: vmrlf %v1, %v24, %v24 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb %v0, %v1, %v0 +; CHECK-DAG: vmrhf [[REG0:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb %v1, [[REG1]], [[REG0]] ; CHECK-NEXT: vpkg %v0, %v1, %v0 ; CHECK-NEXT: vuphf %v0, %v0 ; CHECK-NEXT: vsel %v24, %v28, %v30, %v0 @@ -391,14 +391,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb %v0, %v1, %v0 +; CHECK-DAG: vmrhf [[REG0:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb %v1, [[REG1]], [[REG0]] ; CHECK-NEXT: vpkg %v0, %v1, %v0 ; CHECK-NEXT: vsel %v24, %v28, %v30, %v0 ; CHECK-NEXT: br %r14 @@ -419,15 +419,15 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 -; CHECK-NEXT: vldeb %v0, %v0 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vpkg [[REG0:%v[0-9]+]], %v1, %v0 +; CHECK-DAG: vldeb %v0, %v0 +; CHECK-DAG: vldeb %v1, %v1 +; CHECK-DAG: vfchdb %v0, %v1, %v0 +; CHECK-DAG: vmrhf [[REG0:%v[0-9]+]], %v26, %v26 +; CHECK-DAG: vmrhf [[REG1:%v[0-9]+]], %v24, %v24 +; CHECK-DAG: vldeb [[REG0]], [[REG0]] +; CHECK-DAG: vldeb [[REG1]], [[REG1]] +; CHECK-DAG: vfchdb %v1, [[REG1]], [[REG0]] +; CHECK-DAG: vpkg [[REG0:%v[0-9]+]], %v1, %v0 ; CHECK-DAG: vmrlg [[REG1:%v[0-9]+]], [[REG0]], [[REG0]] ; CHECK-DAG: vuphf [[REG1]], [[REG1]] ; CHECK-DAG: vuphf [[REG2:%v[0-9]+]], [[REG0]] Index: test/CodeGen/SystemZ/vec-ctpop-01.ll =================================================================== --- test/CodeGen/SystemZ/vec-ctpop-01.ll +++ test/CodeGen/SystemZ/vec-ctpop-01.ll @@ -30,8 +30,8 @@ define <4 x i32> @f3(<4 x i32> %a) { ; CHECK-LABEL: f3: -; CHECK: vpopct [[T1:%v[0-9]+]], %v24, 0 -; CHECK: vgbm [[T2:%v[0-9]+]], 0 +; CHECK-DAG: vpopct [[T1:%v[0-9]+]], %v24, 0 +; CHECK-DAG: vgbm [[T2:%v[0-9]+]], 0 ; CHECK: vsumb %v24, [[T1]], [[T2]] ; CHECK: br %r14 @@ -41,8 +41,8 @@ define <2 x i64> @f4(<2 x i64> %a) { ; CHECK-LABEL: f4: -; CHECK: vpopct [[T1:%v[0-9]+]], %v24, 0 -; CHECK: vgbm [[T2:%v[0-9]+]], 0 +; CHECK-DAG: vpopct [[T1:%v[0-9]+]], %v24, 0 +; CHECK-DAG: vgbm [[T2:%v[0-9]+]], 0 ; CHECK: vsumb [[T3:%v[0-9]+]], [[T1]], [[T2]] ; CHECK: vsumgf %v24, [[T3]], [[T2]] ; CHECK: br %r14