Index: llvm/lib/CodeGen/TargetInstrInfo.cpp =================================================================== --- llvm/lib/CodeGen/TargetInstrInfo.cpp +++ llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -990,7 +990,7 @@ // Don't allow any virtual-register uses. Rematting an instruction with // virtual register uses would length the live ranges of the uses, which // is not necessarily a good idea, certainly not "trivial". - if (MO.isUse()) + if (MO.isUse() && !MO.isUndef()) return false; } Index: llvm/lib/Target/SystemZ/CMakeLists.txt =================================================================== --- llvm/lib/Target/SystemZ/CMakeLists.txt +++ llvm/lib/Target/SystemZ/CMakeLists.txt @@ -19,6 +19,7 @@ SystemZCallingConv.cpp SystemZConstantPoolValue.cpp SystemZCopyPhysRegs.cpp + SystemZDomainReassignment.cpp SystemZElimCompare.cpp SystemZFrameLowering.cpp SystemZHazardRecognizer.cpp Index: llvm/lib/Target/SystemZ/SystemZ.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZ.h +++ llvm/lib/Target/SystemZ/SystemZ.h @@ -83,6 +83,10 @@ const unsigned CCMASK_TM_MSB_1 = CCMASK_2 | CCMASK_3; const unsigned CCMASK_TM = CCMASK_ANY; +const unsigned CCMASK_VTM_SOME_0 = CCMASK_TM_SOME_0 ^ CCMASK_2; +const unsigned CCMASK_VTM_SOME_1 = CCMASK_TM_SOME_1 ^ CCMASK_2; +const unsigned CCMASK_VTM = CCMASK_TM ^ CCMASK_2; + // Condition-code mask assignments for TRANSACTION_BEGIN. const unsigned CCMASK_TBEGIN_STARTED = CCMASK_0; const unsigned CCMASK_TBEGIN_INDETERMINATE = CCMASK_1; @@ -189,6 +193,7 @@ FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel); +FunctionPass *createSystemZDomainReassignmentPass(SystemZTargetMachine &TM); FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); Index: llvm/lib/Target/SystemZ/SystemZDomainReassignment.cpp =================================================================== --- /dev/null +++ llvm/lib/Target/SystemZ/SystemZDomainReassignment.cpp @@ -0,0 +1,3222 @@ +//==- SystemZDomainReassignment.cpp - Selectively switch register classes --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass attempts to find instruction chains (closures) in one domain, +// and convert them to equivalent instructions in a different domain, +// if profitable. +// +// On SystemZ this means converting GPR closures to the vector domain. The +// aim can be to reduce register pressure or to avoid vector element +// extractions. +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "SystemZInstrInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Printable.h" +#include +using namespace llvm; + +#define SYSTEMZ_DOMAINREASSIGN_NAME "SystemZ Domain Reassignment Pass" +#define DEBUG_TYPE "systemz-domain-reassignment" + +static cl::opt +DisableDomReass("disable-domreass", cl::init(false), cl::Hidden); + +static cl::opt +DumpVRLiveness("dump-vregliveness", cl::init(false), cl::Hidden); + +static cl::opt +FullDumpDomainReass("fulldump-domainreass", cl::init(true), cl::Hidden); + +// EXPERIMENTAL +static cl::opt EnableAll("allconv", cl::init(true), cl::Hidden); +static cl::opt EnableVLGVs("vlgvs", cl::init(false), cl::Hidden); +static cl::opt EnableImmLoads("immloads", cl::init(false), cl::Hidden); +static cl::opt EnableRegReg("regreg", cl::init(false), cl::Hidden); +static cl::opt EnableShifts("shifts", cl::init(false), cl::Hidden); +static cl::opt EnableRegExt("regexts", cl::init(false), cl::Hidden); +static cl::opt EnableRegMem("regmem", cl::init(false), cl::Hidden); +static cl::opt EnableRegImm("regimm", cl::init(false), cl::Hidden); +static cl::opt EnableFPConv("fpconv", cl::init(false), cl::Hidden); +static cl::opt EnableCompares("compares", cl::init(false), cl::Hidden); +static cl::opt EnableLoadAddress("loadaddr", cl::init(false), cl::Hidden); + +// Not included with EnableAll: +static cl::opt EnableMemExt("memext", cl::init(false), cl::Hidden); + +static cl::opt MultipleUsersCalls("multiple-call-users", cl::init(false)); + +static cl::opt DisableExtractions("disable-extractions", cl::init(false)); + +// Enable VLGV conversions and reassign regardless of register pressure. +static cl::opt ForceVLGV("domain-force-vlgv", cl::init(false), cl::Hidden); +static cl::opt OnlyWithVLGV("onlywith-vlgv", cl::init(false), cl::Hidden); + +static cl::opt OnlyCmpW0Extr("domain-cmp0-extr", cl::init(false), cl::Hidden); + +// Put loaded vector immediates in preheader if possible. +// XXX calls: avoid or use fp-lane. +static cl::opt HoistImms("domain-hoistimms", cl::init(false), cl::Hidden); + +// Option to enable insertion of non convertible values into GPRs with VLVG. +static cl::opt MaxScalarInsertions("domain-maxins", cl::init(999), + cl::Hidden); + +// Limits for register pressure estimation. +static cl::opt GPRLimit("domain-gprlim", cl::init(16), cl::Hidden); +static cl::opt VecLimit("domain-veclim", cl::init(28), cl::Hidden); +static cl::opt VecSavedLim("domain-vecsavedlim", cl::init(16), cl::Hidden); + +// This enables conversions that are not correctly implemented but just +// quickly addded to see what the impact would be on the number of closures +// reasigned. This also means that the compiled output is incorrect. +static cl::opt Experimental("domain-experiments", cl::init(false), cl::Hidden); + +namespace llvm { + void initializeSystemZDomainReassignmentPass(PassRegistry&); +} + +namespace { + +///// A MRI-based class to track liveness of virtual registers on SSA form. +// Is this avaialable elsewhere in the tree? +class VirtRegLiveness { +public: + VirtRegLiveness() {} + + typedef std::map > MBB2RegsMap; + MBB2RegsMap VRegLiveIns; + MBB2RegsMap VRegLiveOuts; + + std::map > VRegKillMIs; + bool regKilledByMI(Register Reg, const MachineInstr *MI) { + if (VRegKillMIs.find(Reg) == VRegKillMIs.end()) + return false; + return VRegKillMIs[Reg].count(MI); + } + + void compute_and_setkills(const MachineRegisterInfo *MRI, + const MachineDominatorTree *MDT, + MachineFunction *MF); + + void dumpMBB(MachineBasicBlock *MBB); + void dumpMF(MachineFunction *MF); + + bool isLoopLiveThroughNotUsed(Register Reg, MachineBasicBlock *MBB, + const MachineLoopInfo *MLI) { + assert(VRegLiveIns[MBB].count(Reg) && "Expected Reg to be live into MBB."); + const MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); + if (MachineLoop *ML = MLI->getLoopFor(MBB)) { + for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) + if (ML->contains(UseMI.getParent())) + return false; + return true; + } + return false; + } +}; + +void VirtRegLiveness::compute_and_setkills(const MachineRegisterInfo *MRI, + const MachineDominatorTree *MDT, + MachineFunction *MF) { + assert(MRI->isSSA() && "Expected MIR to be in SSA form"); + VRegLiveIns.clear(); + VRegLiveOuts.clear(); + VRegKillMIs.clear(); + + typedef std::map LastUseMap; + std::map Reg2LastUses; + + // Find the last user of every register in every MBB. + for (unsigned Idx = 0; Idx < MRI->getNumVirtRegs(); ++Idx) { + Register Reg = Register::index2VirtReg(Idx); + if (MRI->getVRegDef(Reg) == nullptr) + continue; + LastUseMap &LastUses = Reg2LastUses[Reg]; + for (auto &RegMI : MRI->reg_nodbg_instructions(Reg)) { + MachineBasicBlock *UseMBB = RegMI.getParent(); + if (RegMI.readsRegister(Reg) && + (LastUses.find(UseMBB) == LastUses.end() || + MDT->dominates(LastUses[UseMBB], &RegMI))) + LastUses[UseMBB] = &RegMI; + } + } + + // Find live-ins locally for non-PHI uses. + for (auto &II : Reg2LastUses) { + Register Reg = II.first; + LastUseMap &LastUses = II.second; + MachineInstr *DefMI = MRI->getVRegDef(Reg); + for (auto &LU : LastUses) { + MachineBasicBlock *UseMBB = LU.first; + MachineInstr *UseMI = LU.second; + if (!UseMI->isPHI() && UseMBB != DefMI->getParent()) + // A normal use is live-in if not defined in same block. + VRegLiveIns[UseMBB].insert(Reg); + } + } + + // Handle PHI uses. + for (auto &MBB : *MF) + for (const MachineInstr &MI : MBB.phis()) + for (unsigned MOIdx = 1; MOIdx +1 < MI.getNumOperands(); MOIdx += 2) { + Register Reg = MI.getOperand(MOIdx).getReg(); + MachineBasicBlock *P = MI.getOperand(MOIdx + 1).getMBB(); + MachineInstr *DefMI = MRI->getVRegDef(Reg); + // A PHI use means Reg is live out of and possibly live into P, + // however not generally live into MBB. + VRegLiveOuts[P].insert(Reg); + if (DefMI->getParent() != P) + VRegLiveIns[P].insert(Reg); + } + + // Propagate VRegLiveIns up the CFG. + bool Change = true; + while (Change) { + Change = false; + for (auto &MBB : *MF) + for (auto S : MBB.successors()) + for (Register Reg : VRegLiveIns[S]) { + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (DefMI->getParent() != &MBB && VRegLiveIns[&MBB].insert(Reg).second) + Change = true; + } + } + + // Compute VRegLiveOuts for each MBB. + for (auto &MBB : *MF) + for (auto S : MBB.successors()) + for (Register Reg : VRegLiveIns[S]) + VRegLiveOuts[&MBB].insert(Reg); + + // Register kills: Ideally, kill-flags would now be set directly on the + // operands, but this is left undone for now because this causes by itself + // increased spilling during the extra runs of LICM/CSE (there were a lot + // of missing kill-flags at this point). Keep this information on the side + // instead for now in the VRegKillMIs set. + for (auto &II : Reg2LastUses) { + Register Reg = II.first; + LastUseMap &LastUses = II.second; + for (auto &LU : LastUses) { + MachineBasicBlock *UseMBB = LU.first; + MachineInstr *UseMI = LU.second; + if (!UseMI->isPHI() && !VRegLiveOuts[UseMBB].count(Reg)) + // UseMI->addRegisterKilled(Reg, MRI->getTargetRegisterInfo()); + VRegKillMIs[Reg].insert(UseMI); + } + } +} + +static void dumpRegSet(std::string Msg, std::set &Regs) { + dbgs() << Msg << ": "; + for (auto Reg : Regs) + dbgs() << "%" << Register::virtReg2Index(Reg) << ", "; + dbgs() << "\n"; +} + +void VirtRegLiveness::dumpMBB(MachineBasicBlock *MBB) { + dumpRegSet("Live IN vregs", VRegLiveIns[MBB]); + MBB->dump(); + dumpRegSet("Live OUT vregs", VRegLiveOuts[MBB]); + dbgs() << "\n"; +} + +void VirtRegLiveness::dumpMF(MachineFunction *MF) { + dbgs() << "# Machine code for function " << MF->getName() << "\n\n"; + for (auto &MBB : *MF) + dumpMBB(&MBB); +} + +///// End VirtRegLiveness + +#ifndef NDEBUG +// Debug output +static MachineBasicBlock::iterator getPrevOrEnd(MachineInstr *MI) { + return (MI == MI->getParent()->begin()) ? MI->getParent()->end() : + std::prev(MachineBasicBlock::iterator(MI)); +} + +static void dumpConversion(MachineInstr *MI, MachineBasicBlock::iterator Start) { + MachineBasicBlock *MBB = MI->getParent(); + Start = (Start == MBB->end() ? MBB->begin() : std::next(Start)); + dbgs() << "From : "; MI->dump(); + if (Start == MI) + return; // hoisted + dbgs() << " To "; Start->dump(); + while (++Start != MI) { + dbgs() << " "; Start->dump(); + } +} + +void dumpEnclosureMsg(std::string Msg, const MachineInstr *MI) { + if (FullDumpDomainReass) { + dbgs() << "--- " << Msg << ": "; + MI->dump(); + } +} +#else + // A dummy function definition for a non-debug build, to avoid cluttering + // code around users. + static MachineBasicBlock::iterator getPrevOrEnd(MachineInstr *MI) { + return nullptr; + } +#endif + +// Returns true if Reg belongs to the GR64BitRegClass. +static bool is64BitReg(Register Reg, + const MachineRegisterInfo *MRI) { + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + return RC == &SystemZ::GR64BitRegClass || RC == &SystemZ::ADDR64BitRegClass; +} + +static bool is32BitReg(Register Reg, + const MachineRegisterInfo *MRI) { + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + return (RC == &SystemZ::GRX32BitRegClass || RC == &SystemZ::GR32BitRegClass || + RC == &SystemZ::GRH32BitRegClass || RC == &SystemZ::ADDR32BitRegClass); +} + +static bool isGPRDomainReg(Register Reg, const MachineRegisterInfo *MRI) { + if (!Register::isVirtualRegister(Reg)) + return false; + return is64BitReg(Reg, MRI) || is32BitReg(Reg, MRI); +} + +static bool isAnyGPR(Register Reg, const MachineRegisterInfo *MRI) { + if (isGPRDomainReg(Reg, MRI)) + return true; + if (!Register::isVirtualRegister(Reg)) + return false; + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + return (RC == &SystemZ::ADDR128BitRegClass || + RC == &SystemZ::GR128BitRegClass); +} + +static bool isVecReg(Register Reg, const MachineRegisterInfo *MRI) { + if (!Register::isVirtualRegister(Reg)) + return false; + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + return (RC == &SystemZ::VR32BitRegClass || RC == &SystemZ::VR64BitRegClass || + RC == &SystemZ::VF128BitRegClass || RC == &SystemZ::VR128BitRegClass || + RC == &SystemZ::FP32BitRegClass || RC == &SystemZ::FP64BitRegClass || + RC == &SystemZ::FP128BitRegClass); +} + +// Return a defined virtual GPR, but only if MI defines only it in operand +// 0, to avoid odd cases. +static Register +getDefedGPRReg(const MachineInstr *MI, const MachineRegisterInfo *MRI) { + Register DefReg = 0; + for (unsigned OpIdx = 0; OpIdx < MI->getNumExplicitOperands(); ++OpIdx) { + auto &Op = MI->getOperand(OpIdx); + if (Op.isReg() && Op.isDef()) { + if (OpIdx != 0 || DefReg) + return SystemZ::NoRegister; + DefReg = Op.getReg(); + } + } + return (DefReg && isGPRDomainReg(DefReg, MRI)) ? DefReg : SystemZ::NoRegister; +} + +/////// Vector lanes handling: + +// Each reassigned virtual register will have a vector lane assigned to +// it. This facilitates things like extensions / truncations where the result +// will end up in a specific lane. For example, truncating a 64 bit value in +// element:64 0 will give the low 32 bits in element:32 1. Before deciding +// that a closure can be reassigned, it is iterated over to find a possible +// assignements of vector lanes. + +// Vector lane identifiers: +// [ G0| G1] +// [ F0| F1| F2| F3] +// [ H H H H H H H H] +// [BBBBBBBBBBBBBBBB] +static unsigned G0 = 1 << 0; +static unsigned G1 = 1 << 1; +static unsigned F0 = 1 << 2; +static unsigned F1 = 1 << 3; +static unsigned F2 = 1 << 4; +static unsigned F3 = 1 << 5; +static unsigned H0 = 1 << 6; +// H1 = 1 << 7; +// H2 = 1 << 8; +// H3 = 1 << 9; +// H4 = 1 << 10; +// H5 = 1 << 11; +// H6 = 1 << 12; +// H7 = 1 << 13; +static unsigned B0 = 1 << 14; +// B1 = 1 << 15; +// B2 = 1 << 16; +// B3 = 1 << 17; +// B4 = 1 << 18; +// B5 = 1 << 19; +// B6 = 1 << 20; +// B7 = 1 << 21; +// B8 = 1 << 22; +// B9 = 1 << 23; +// B10 = 1 << 24; +// B11 = 1 << 25; +// B12 = 1 << 26; +// B13 = 1 << 27; +// B14 = 1 << 28; +// B15 = 1 << 29; + +// Returns the vector lane corresponding to the extracted element of MI. +static unsigned VLGVElt2Lane(const MachineInstr *MI) { + unsigned FirstLane = 0; + switch(MI->getOpcode()) { + case SystemZ::VLGVG: + FirstLane = countTrailingZeros(G0); + break; + case SystemZ::VLGVF: + FirstLane = countTrailingZeros(F0); + break; + case SystemZ::VLGVH: + FirstLane = countTrailingZeros(H0); + break; + case SystemZ::VLGVB: + FirstLane = countTrailingZeros(B0); + break; + default: + llvm_unreachable("Expected a VLGV opcode"); + break; + } + unsigned ExtractIdx = MI->getOperand(3).getImm(); + return 1 << (FirstLane + ExtractIdx); +} + +// These functions return true if the single lane in Lanes is of a particular +// size. +static bool isDoubleWordLane(unsigned Lanes) { + assert(countPopulation(Lanes) == 1 && "Lane not selected?"); + return Lanes < F0; +} +static bool isFullWordLane(unsigned Lanes) { + assert(countPopulation(Lanes) == 1 && "Lane not selected?"); + return Lanes >= G0 && Lanes < H0; +} +static bool isHalfWordLane(unsigned Lanes) { + assert(countPopulation(Lanes) == 1 && "Lane not selected?"); + return Lanes >= H0 && Lanes < B0; +} +static bool isByteLane(unsigned Lanes) { + assert(countPopulation(Lanes) == 1 && "Lane not selected?"); + return Lanes >= B0; +} + +// This function takes a set SrcLanes and a set DstLanes representing the +// possible lanes of two registers. The two masks show which lanes depend on +// each other: If there are no lanes of SrcMask available in SrcLanes, then +// all of DstMask lanes in DstLanes are unavailable, and vice versa. For +// example: truncating G0 or G1 can only give the result in F1 or F3, but if +// G1 is not available, then F3 can also not be used. +static void applyLaneDeps(unsigned &SrcLanes, unsigned SrcMask, + unsigned &DstLanes, unsigned DstMask) { + if (!(SrcLanes & SrcMask)) + DstLanes &= ~DstMask; + else if (!(DstLanes & DstMask)) + SrcLanes &= ~SrcMask; +} + +// Returns the element index corresponding to a vector lane. +static unsigned lane2EltIdx(unsigned Lanes) { + assert(countPopulation(Lanes) == 1 && "Lane not selected?"); + unsigned LaneIdx = countTrailingZeros(Lanes); + if (isDoubleWordLane(Lanes)) + return LaneIdx - countTrailingZeros(G0); + if (isFullWordLane(Lanes)) + return LaneIdx - countTrailingZeros(F0); + if (isHalfWordLane(Lanes)) + return LaneIdx - countTrailingZeros(H0); + return LaneIdx - countTrailingZeros(B0); +} + +// Checks if the lanes have changed for Reg. If so, Lanes for Reg is updated, +// related instructions are pushed onto the worklist, and true is +// returned. Otherwise returns false. +static bool updateLanes(Register Reg, + unsigned NewLanes, + DenseMap &Lanes, + const MachineRegisterInfo *MRI, + std::list &Worklist, + const MachineInstr *MI) { + if (NewLanes != Lanes[Reg]) { + LLVM_DEBUG(if (FullDumpDomainReass) { + dbgs() << "Visiting "; + MI->dump(); + dbgs() << "Lanes %" + << Register::virtReg2Index(Reg) << ": "; + if (is64BitReg(Reg, MRI)) + dbgs() << ((NewLanes & G0) ? " G0" : " --") + << ((NewLanes & G1) ? " G1" : " --"); + else + dbgs() << ((NewLanes & F0) ? "F0 " : "-- ") + << ((NewLanes & F1) ? "F1 " : "-- ") + << ((NewLanes & F2) ? "F2 " : "-- ") + << ((NewLanes & F3) ? "F3 " : "-- "); + if (NewLanes >= H0) { + if (countPopulation(NewLanes) == 1) { + if (isHalfWordLane(NewLanes)) + dbgs() << " H" << lane2EltIdx(NewLanes); + else + dbgs() << " B" << lane2EltIdx(NewLanes); + } + else + dbgs() << " H/B lanes set"; + } + dbgs() << "\n";}); + Lanes[Reg] = NewLanes; + for (auto &RegMI : MRI->reg_nodbg_instructions(Reg)) + if (&RegMI != MI) + Worklist.push_back(&RegMI); + return true; + } + return false; +} + +// Update lanes for two registers. +static bool updateLanes(Register Reg0, + unsigned NewLanes0, + Register Reg1, + unsigned NewLanes1, + DenseMap &Lanes, + const MachineRegisterInfo *MRI, + std::list &Worklist, + const MachineInstr *MI) { + bool Change = updateLanes(Reg0, NewLanes0, Lanes, MRI, Worklist, MI); + Change |= updateLanes(Reg1, NewLanes1, Lanes, MRI, Worklist, MI); + return Change; +} + +// Called when any of the lanes in Lanes is possible to use, in which case +// the first one is taken. +static void selectLane(unsigned &Lanes) { + assert(Lanes && "Cannot select a lane."); + unsigned FirstEltIdx = countTrailingZeros(Lanes); + Lanes = 1 << FirstEltIdx; +} + +// This function selects the first lane available for all explicit operands. +static bool selectLanesGeneric(const MachineInstr *MI, + DenseMap &Lanes, + std::list &Worklist, + const MachineRegisterInfo *MRI) { + SmallVector MOs; + for (auto &Op : MI->explicit_operands()) + if (Op.isReg() && Lanes.find(Op.getReg()) != Lanes.end()) + MOs.push_back(&Op); + + bool Change = false; + // Pick the first available lane. + for (unsigned I = 0; I < MOs.size(); I++) { + unsigned RegLanes = Lanes[MOs[I]->getReg()]; + selectLane(RegLanes); + Change |= + updateLanes(MOs[I]->getReg(), RegLanes, Lanes, MRI, Worklist, MI); + } + + return Change; +} + +// A generic implementation of findLanes() that finds the intersection of +// possible lanes for all operands. For example, a VAG would require all +// operands to use the same lane, so if one of the source had to be in lane +// G0, the other operands would as well. +static bool findLanesGeneric(const MachineInstr *MI, + DenseMap &Lanes, + std::list &Worklist, + bool ToFinal, + const MachineRegisterInfo *MRI) { + SmallVector MOs; + for (auto &Op : MI->explicit_operands()) + if (Op.isReg() && Lanes.find(Op.getReg()) != Lanes.end()) + MOs.push_back(&Op); + + if (ToFinal) { + bool Change = selectLanesGeneric(MI, Lanes, Worklist, MRI); + for (unsigned I = 0; I < MOs.size(); I++) + assert(Lanes[MOs[I]->getReg()] == Lanes[MOs[0]->getReg()] && + "All operands should use the same lane."); + return Change; + } + + bool Change = false; + if (MOs.size() > 1) { + // Find the intersection of lanes. + unsigned RegLanes = ~0U; + for (unsigned I = 0; I < MOs.size(); I++) + RegLanes &= Lanes[MOs[I]->getReg()]; + // Update the Lanes entry for each operand. + for (unsigned I = 0; I < MOs.size(); I++) + Change |= + updateLanes(MOs[I]->getReg(), RegLanes, Lanes, MRI, Worklist, MI); + } + + return Change; +} + +// MI is a scalar instruction which loads from memory. Load the value into +// the element EltIdx of DstReg, or a new virtual virtual register if not +// provided. Returns DstReg. TODO: Common ImplDefReg beneficial? +static Register loadMemIntoVecElt(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, unsigned EltIdx, + Register DstReg = SystemZ::NoRegister) { + unsigned FirstMemOp = 0; + while (MI->getDesc().OpInfo[FirstMemOp].OperandType != MCOI::OPERAND_MEMORY) + FirstMemOp++; + assert(MI->getDesc().OpInfo[FirstMemOp].OperandType == MCOI::OPERAND_MEMORY && + (MI->getDesc().OpInfo[FirstMemOp + 1].OperandType == + MCOI::OPERAND_MEMORY) && + (MI->getDesc().OpInfo[FirstMemOp + 2].OperandType == + MCOI::OPERAND_MEMORY) && + "Expected MI to have three memory operands."); + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + Register VTmp0 = MRI->createVirtualRegister(&SystemZ::VR128BitRegClass); + if (DstReg == SystemZ::NoRegister) + DstReg = MRI->createVirtualRegister(&SystemZ::VR128BitRegClass); + unsigned VLEOpc = 0; + MachineMemOperand *MMO = *MI->memoperands_begin(); + switch (MMO->getSize()) { + case 8: VLEOpc = SystemZ::VLEG; break; + case 4: VLEOpc = SystemZ::VLEF; break; + case 2: VLEOpc = SystemZ::VLEH; break; + default: break; + } + assert(VLEOpc && "Unexpected number of loaded bytes."); + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), VTmp0); + BuildMI(*MBB, MI, DL, TII->get(VLEOpc), DstReg) + .addReg(VTmp0) + .add(MI->getOperand(FirstMemOp)) + .add(MI->getOperand(FirstMemOp + 1)) + .add(MI->getOperand(FirstMemOp + 2)) + .addImm(EltIdx) + .setMemRefs(MI->memoperands()); + return DstReg; +} + +// Types of immediates that are treated differently. +enum ImmediateType { SE16, SE32, SInt32, ZE16, ZE32, ZELH16, UInt32, ANDLow16, NoImmTy }; + +// A class that can load an immediate into a vector element. Different loads +// must use the same IMPLICIT_DEF register in order for MachineCSE to be +// effective later. +class ElementImmLoader { + MachineFunction &MF; + const SystemZInstrInfo *TII; + MachineRegisterInfo *MRI; + const MachineLoopInfo *MLI; + + // EXPERIMENTAL: This is only needed for "domain-cleanups" passes to be + // effecive, since instructions are only identical if all operands are. + Register ImplDefReg; + Register getOrCreateImplDefReg() { + if (ImplDefReg == 0) { + ImplDefReg = MRI->createVirtualRegister(&SystemZ::VR128BitRegClass); + MachineBasicBlock *Entry = &MF.front(); + BuildMI(*Entry, Entry->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), ImplDefReg); + } + return ImplDefReg; + } + +public: + ElementImmLoader(MachineFunction &MF, const SystemZInstrInfo *TII, + const MachineLoopInfo *MLI) + : MF(MF), TII(TII), MLI(MLI), ImplDefReg(0) { + MRI = &MF.getRegInfo(); + } + + Register loadImmIntoVecElt(MachineInstr *MI, unsigned EltIdx, int64_t Imm, + ImmediateType ImmType, + Register DstReg = SystemZ::NoRegister); +}; + +// Load Imm into the element EltIdx of DstReg, or a new virtual virtual +// register if not provided. Returns DstReg. +// Note: 64 bit immediates with LLIHF + OILF64 not (yet?) supported. +Register ElementImmLoader:: +loadImmIntoVecElt(MachineInstr *MI, unsigned EltIdx, int64_t Imm, + ImmediateType ImmType, Register DstReg) { + assert((isUInt<32>(Imm) || isInt<32>(Imm)) && "Unexpected huge immediate."); + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + bool ResultIs64Bit = is64BitReg(MI->getOperand(0).getReg(), MRI); + bool DoSE16 = false; + switch (ImmType) { + case ImmediateType::SE16: + // The significant bits of Imm may or may not have been sign extended. + assert((isUInt<16>(Imm) || isInt<16>(Imm)) && "Unexpected bits."); + Imm = int16_t(Imm); + DoSE16 = true; + break; + case ImmediateType::SE32: + // The significant bits of Imm may or may not have been sign extended. + assert((isUInt<32>(Imm) || isInt<32>(Imm)) && "Unexpected bits."); + Imm = int32_t(Imm); + if (isInt<16>(Imm)) + DoSE16 = true; + break; + case ImmediateType::SInt32: + // A 32 bit signed integer. + assert(isInt<32>(Imm) && "Unexpected bits."); + assert(!ResultIs64Bit && "A 64 bit element needs extension of immediate."); + if (isInt<16>(Imm)) + DoSE16 = true; + break; + case ImmediateType::ZE16: + // 16 bits that should be zero extended. + assert(isUInt<16>(Imm) && "Unexpected bits."); + LLVM_FALLTHROUGH; + case ImmediateType::ZE32: + // 32 bits that should be zero extended. + assert(isUInt<32>(Imm) && "Unexpected bits."); + if (isUInt<15>(Imm)) + DoSE16 = true; + break; + case ImmediateType::ZELH16: + // 16 high bits of low 32 that should be zero extended to 64 bits. + assert(isUInt<16>(Imm) && "Unexpected bits."); + Imm <<= 16; + break; + case ImmediateType::UInt32: + // A 32 bit unsigned integer. + assert(isUInt<32>(Imm) && "Unexpected bits."); + assert(!ResultIs64Bit && "A 64 bit element needs extension of immediate."); + if ((Imm >> 16 == 0xffff) && (Imm & (1 << 15))) { + Imm = int16_t(Imm & 0xffff); + DoSE16 = true; + } + else if (isUInt<15>(Imm)) + DoSE16 = true; + break; + case ImmediateType::ANDLow16: + // The AND-mask for the 16 low bits which needs all high bits to be set. + assert(isUInt<16>(Imm) && "Unexpected bits."); + Imm = (int64_t(-1) ^ 0xffff) | Imm; + if (isInt<16>(Imm)) + DoSE16 = true; + break; + case NoImmTy: + llvm_unreachable("Can't load an unspecified immediate."); + break; + } + + if (DstReg == SystemZ::NoRegister) + DstReg = MRI->createVirtualRegister(&SystemZ::VR128BitRegClass); + + MachineBasicBlock *InsMBB = MBB; + MachineBasicBlock::iterator InsPt = MI->getIterator(); + if (HoistImms) + if (MachineLoop *ML = MLI->getLoopFor(MBB)) + if (MachineBasicBlock *PreHeader = ML->getLoopPreheader()) { + InsMBB = PreHeader; + InsPt = PreHeader->getFirstTerminator(); + for (auto &MO : MRI->use_nodbg_operands(DstReg)) + MO.setIsKill(false); + } + + Register ImplDefReg = getOrCreateImplDefReg(); + if (DoSE16) { + // Load Imm into the element with a VLEI, which will sign extend it. + unsigned VLEIOpc = ResultIs64Bit ? SystemZ::VLEIG : SystemZ::VLEIF; + BuildMI(*InsMBB, InsPt, DL, TII->get(VLEIOpc), DstReg) + .addReg(ImplDefReg) + .addImm(Imm) + .addImm(EltIdx); + return DstReg; + } + + int32_t High32 = ((Imm >> 32) & UINT32_MAX); + int16_t Lo16(Imm & UINT16_MAX); + int16_t Hi16((Imm >> 16) & UINT16_MAX); + + bool DoLo16VLEIG = ResultIs64Bit && ((Lo16 & (1 << 15)) == (High32 & 1)); + bool DoHi16VLEIG = (ResultIs64Bit && (Hi16 == High32)); + bool DoHigh32 = ResultIs64Bit && !DoLo16VLEIG && !DoHi16VLEIG; + bool DoLow32VLEIF = DoHigh32 && Hi16 == -1 && Lo16 < 0; + bool DoLo16 = !DoLow32VLEIF && !DoLo16VLEIG; + bool DoHi16 = !DoLow32VLEIF && !DoHi16VLEIG; + + Register VTmp0 = 0; + if (DoLo16VLEIG || DoHi16VLEIG) { + // High32 matches the sign bit of Lo16 or the value of Hi16. + VTmp0 = MRI->createVirtualRegister(&SystemZ::VR128BitRegClass); + BuildMI(*InsMBB, InsPt, DL, TII->get(SystemZ::VLEIG), VTmp0) + .addReg(ImplDefReg) + .addImm(DoLo16VLEIG ? Lo16 : Hi16) + .addImm(EltIdx); + } + else if (DoHigh32) { + // Fill the high 32 bits with ones or zeroes for a 64 bit value. + VTmp0 = MRI->createVirtualRegister(&SystemZ::VR128BitRegClass); + BuildMI(*InsMBB, InsPt, DL, TII->get(SystemZ::VLEIF), VTmp0) + .addReg(ImplDefReg) + .addImm(High32) + .addImm(EltIdx * 2); + } + else + VTmp0 = ImplDefReg; + + if (DoLow32VLEIF) { + BuildMI(*InsMBB, InsPt, DL, TII->get(SystemZ::VLEIF), DstReg) + .addReg(VTmp0) + .addImm(Lo16) + .addImm(EltIdx * 2 + 1); + } else { + unsigned VLEIH_HighIdx = ResultIs64Bit ? EltIdx * 4 + 2 : EltIdx * 2; + Register VTmp2; + if (DoLo16) { + VTmp2 = + DoHi16 ? MRI->createVirtualRegister(&SystemZ::VR128BitRegClass) : DstReg; + BuildMI(*InsMBB, InsPt, DL, TII->get(SystemZ::VLEIH), VTmp2) + .addReg(VTmp0) + .addImm(Lo16) + .addImm(VLEIH_HighIdx + 1); + } else + VTmp2 = VTmp0; + + if (DoHi16) + BuildMI(*InsMBB, InsPt, DL, TII->get(SystemZ::VLEIH), DstReg) + .addReg(VTmp2) + .addImm(Hi16) + .addImm(VLEIH_HighIdx); + } + + return DstReg; +} + +// Return true if MI:OpIdx is used to actually access memory. +// static bool usedForMemAccess(const MachineInstr *MI, unsigned OpIdx) { +// if (!MI->mayLoad() && !MI->mayStore()) +// return false; +// +// Return true if MI:OpIdx is used in an address (and should not be converted). +static bool usedInAddress(const MachineInstr *MI, unsigned OpIdx) { + //EXPERIMENTAL: + if (MI->getOpcode() == SystemZ::LA || MI->getOpcode() == SystemZ::LAY) + return false; + if (MI->isCopy() || MI->isPHI() || MI->isInlineAsm()) + return false; + assert(OpIdx < MI->getDesc().getNumOperands() && "bad operand index."); + return MI->getDesc().OpInfo[OpIdx].OperandType == MCOI::OPERAND_MEMORY; +} + +/////// Instruction converters +// A converter is defined for each scalar opcode that can be converted into +// the vector domain + +enum Reason { IllegalInsertion, Address, Offset, SubReg, PhysRegIN, PhysRegOUT, + VLGVVarElt, VLGVSearch, VLGVExt, ImplicitReg, + Cmp0NoExtract, CCUsers, CCUserMask, CmpImmediate, HasCalls, + HasCallsInner, HasCallsDefBeforeLoop, HasCallsLanes, + HasCallsPHIs, HasCallsUsers, InsertionOnly, + NumScalarInsertions, EndsWithInt2FP, Lanes, Extraction, + InsertVLGV, NoVLGV}; + +static int const VLGVReassignCost = -4; + +/// Abstract instruction converter base class. +class InstrConverterBase { +protected: + unsigned SrcOpcode; + +public: + InstrConverterBase(unsigned SrcOpcode) : SrcOpcode(SrcOpcode) {} + + virtual ~InstrConverterBase() {} + + /// \returns true if \p MI is legal to convert. + virtual bool isLegal(const MachineInstr *MI, + const SystemZInstrInfo *TII, + const MachineRegisterInfo *MRI, + std::set &Reasons) const { + assert(MI->getOpcode() == SrcOpcode && + "Wrong instruction passed to converter"); + + // Memory: Rejecting all instructions with >12 bit displacements. Using a + // GPR to add a bigger offset would defeat the purpose of helping + // register pressure (that would only possibly be beneficial if the + // address register is killed and therefore could be used for this). + if (MI->mayLoad() || MI->mayStore()) { + assert(MI->hasOneMemOperand() && "Missing memory operand?"); + unsigned NumOps = MI->getNumExplicitOperands(); + int Displ = MI->getOperand(NumOps - 2).getImm(); + if (!isUInt<12>(Displ)) { + LLVM_DEBUG(dumpEnclosureMsg("offset ", MI);); + Reasons.insert(Reason::Offset); + return false; + } + } + + // Only deal with subregs in COPYs from GR64 to GR32. + for (auto &Op : MI->explicit_uses()) + if (Op.isReg() && Op.getSubReg() && + (!MI->isCopy() || + (Op.getSubReg() != SystemZ::subreg_l32 || + MRI->getRegClass(Op.getReg()) != &SystemZ::GR64BitRegClass))) { + LLVM_DEBUG(dumpEnclosureMsg("subreg ", MI);); + Reasons.insert(Reason::SubReg); + return false; + } + + return true; + } + + /// In first iteration (ToFinal == false), the set of possible vector lanes + /// is found for each operand. If ToFinal is true, a single lane for each + /// operand is selected. \returns true if anything changed. + virtual bool findLanes(const MachineInstr *MI, + DenseMap &Lanes, + std::list &Worklist, + bool ToFinal, + const MachineRegisterInfo *MRI) const { + return findLanesGeneric(MI, Lanes, Worklist, ToFinal, MRI); + } + + /// Applies conversion to \p MI. + /// + /// \returns true if \p MI is no longer need, and can be deleted. + virtual bool convertInstr(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, + DenseMap &Lanes) = 0; + + /// \returns the cost increment incurred by converting \p MI. + virtual int getExtraCost(const MachineInstr *MI, + MachineRegisterInfo *MRI) const { return 0; } + + virtual bool isInt2FP() const { return false; } +}; + +/// An Instruction Converter for pseudos like PHI instructions which are not +/// changed. +class PseudoConverter : public InstrConverterBase { +public: + PseudoConverter(unsigned SrcOpcode) : InstrConverterBase(SrcOpcode) {} + + bool convertInstr(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, + DenseMap &Lanes) override { + return false; // Don't erase + } +}; + +// If MI is a COPY of the low 32 bits of a VLGV instruction return the VLGV, +// otherwise nullptr. +static const MachineInstr *getVLGVDefMIFromCopyLow32(const MachineInstr *MI, + const MachineRegisterInfo *MRI) { + if (!MI->isCopy()) + return nullptr; + const MachineOperand &SrcMO = MI->getOperand(1); + if (SrcMO.getSubReg() != SystemZ::subreg_l32 || + !MRI->hasOneDef(SrcMO.getReg())) + return nullptr; + MachineInstr *DefMI = MRI->getVRegDef(SrcMO.getReg()); + if (DefMI->getOpcode() == SystemZ::VLGVF || + DefMI->getOpcode() == SystemZ::VLGVH || + DefMI->getOpcode() == SystemZ::VLGVB) + return DefMI; + return nullptr; +} + +// If Reg was defined by or copied from a VLGV instruction, return the VLGV, +// otherwise nullptr. +static const MachineInstr* +getVLGVDefMIFromReg(Register Reg, const MachineRegisterInfo *MRI) { + if (!MRI->hasOneDef(Reg)) + return nullptr; + MachineInstr *MI = MRI->getVRegDef(Reg); + if (MI->getOpcode() == SystemZ::VLGVG) + return MI; + while (MI->isCopy()) { + Register DstReg = MI->getOperand(0).getReg(); + Register SrcReg = MI->getOperand(1).getReg(); + if (Register::isPhysicalRegister(SrcReg)) + return nullptr; + if (!is32BitReg(DstReg, MRI) || !is32BitReg(SrcReg, MRI) || + !MRI->hasOneDef(SrcReg)) + break; + MI = MRI->getVRegDef(SrcReg); + } + return getVLGVDefMIFromCopyLow32(MI, MRI); +} + +/// An instruction converter for replacing COPY instructions. +class COPYConverter : public InstrConverterBase { +public: + + COPYConverter() : InstrConverterBase(SystemZ::COPY) {} + + bool isLegal(const MachineInstr *MI, + const SystemZInstrInfo *TII, + const MachineRegisterInfo *MRI, + std::set &Reasons) const override { + if (!InstrConverterBase::isLegal(MI, TII, MRI, Reasons)) + return false; + + const MachineOperand &DstMO = MI->getOperand(0); + const MachineOperand &SrcMO = MI->getOperand(1); + if (Register::isPhysicalRegister(DstMO.getReg()) || + Register::isPhysicalRegister(SrcMO.getReg())) { + // Don't convert a COPY involving a phys-reg. + LLVM_DEBUG(dumpEnclosureMsg("physreg ", MI);); + Reasons.insert(Register::isPhysicalRegister(DstMO.getReg()) ? + Reason::PhysRegOUT : Reason::PhysRegIN); + return false; + } + + return true; + } + + virtual bool findLanes(const MachineInstr *MI, + DenseMap &Lanes, + std::list &Worklist, + bool ToFinal, + const MachineRegisterInfo *MRI) const override { + const MachineOperand &DstMO = MI->getOperand(0); + const MachineOperand &SrcMO = MI->getOperand(1); + if (!SrcMO.getSubReg()) + return InstrConverterBase::findLanes(MI, Lanes, Worklist, ToFinal, MRI); + + // VLGVF/VGLVH/VLGVB cases. + if (const MachineInstr *VLGV_MI = getVLGVDefMIFromCopyLow32(MI, MRI)) { + // The COPY source reg will be replaced by the vector (VLGV) source reg + // and will/can not have any lane assigned to it. Find the lane for the + // extracted element and assign it to DstLanes. + unsigned DstLanes = VLGVElt2Lane(VLGV_MI); + return updateLanes(DstMO.getReg(), DstLanes, Lanes, MRI, Worklist, MI); + } + + // General case of copying low32 subreg. + if (ToFinal) { + bool Change = selectLanesGeneric(MI, Lanes, Worklist, MRI); + assert(((Lanes[SrcMO.getReg()] == G0 && Lanes[DstMO.getReg()] == F1) || + (Lanes[SrcMO.getReg()] == G1 && Lanes[DstMO.getReg()] == F3)) && + "Bad lanes for COPY of subreg_l32."); + return Change; + } + + unsigned DstLanes = Lanes[DstMO.getReg()] & (F1 | F3); + unsigned SrcLanes = Lanes[SrcMO.getReg()]; + applyLaneDeps(SrcLanes, G0, DstLanes, F1); + applyLaneDeps(SrcLanes, G1, DstLanes, F3); + return updateLanes(DstMO.getReg(), DstLanes, SrcMO.getReg(), SrcLanes, + Lanes,MRI, Worklist, MI); + } + + bool convertInstr(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, + DenseMap &Lanes) override { + MachineOperand &SrcMO = MI->getOperand(1); + + // VLGVF/VGLVH/VLGVB cases. + if (const MachineInstr *VLGV_MI = getVLGVDefMIFromCopyLow32(MI, MRI)) { + const MachineOperand &VecMO = VLGV_MI->getOperand(1); + SrcMO.setReg(VecMO.getReg()); + SrcMO.setIsKill(VecMO.isKill()); + } + + if (SrcMO.getSubReg()) { + // Remove subreg of COPY source and rely on using the right vector + // element (lane) in users. + LLVM_DEBUG(dbgs() << "From : "; MI->dump();); + SrcMO.setSubReg(0); + LLVM_DEBUG(dbgs() << " To "; MI->dump();); + } + + // Don't erase the COPY. + return false; + } +}; + +/// An instruction converter for element extractions (VLGV). The 32/16/8 bit +/// cases COPY the low 32 bits out of the defined 64 bits and therefore their +/// conversions are handled in part by the COPYConverter. +class VLGVConverter : public InstrConverterBase { + + // Returns true if the UserMI is only using the extracted bits of the DefMI + // element, and not the full zero extended value. + bool isUsingOnlyExtractedBits(const MachineInstr *UserMI, + const MachineInstr *DefMI, + const MachineRegisterInfo *MRI) const { + assert((DefMI->getOpcode() == SystemZ::VLGVH || + DefMI->getOpcode() == SystemZ::VLGVB) && "Bad DefMI opcode."); + if (UserMI->isCopy()) { + Register CopyDefReg = UserMI->getOperand(0).getReg(); + for (auto &CopyUseMI : MRI->use_nodbg_instructions(CopyDefReg)) + if (!isUsingOnlyExtractedBits(&CopyUseMI, DefMI, MRI)) + return false; + return true; + } + else if (UserMI->getOpcode() == SystemZ::TMLMux) { + assert(getVLGVDefMIFromReg(UserMI->getOperand(0).getReg(), MRI) == DefMI && + "Could not trace back to DefMI?"); + if (DefMI->getOpcode() == SystemZ::VLGVB) + return isUInt<8>(UserMI->getOperand(1).getImm()); + return true; + } + + return false; + } + +public: + VLGVConverter(unsigned SrcOpcode) : InstrConverterBase(SrcOpcode) {} + + bool isLegal(const MachineInstr *MI, + const SystemZInstrInfo *TII, + const MachineRegisterInfo *MRI, + std::set &Reasons) const override { + if (!InstrConverterBase::isLegal(MI, TII, MRI, Reasons)) + return false; + + // Can only deal with a constant element index. + if (MI->getOperand(2).getReg()) { + LLVM_DEBUG(dumpEnclosureMsg("variable elt", MI);); + Reasons.insert(Reason::VLGVVarElt); + return false; + } + + Register DefReg = MI->getOperand(0).getReg(); + if (MI->getOpcode() != SystemZ::VLGVG) { + // All users should be a COPY of the low32 subreg, and all those COPYs + // must be able to find their way to MI as well. + for (auto &UseMI : MRI->use_nodbg_instructions(DefReg)) + if (getVLGVDefMIFromCopyLow32(&UseMI, MRI) != MI) { + LLVM_DEBUG(dumpEnclosureMsg("context ", MI);); + Reasons.insert(Reason::VLGVSearch); + return false; + } + } + + if (MI->getOpcode() == SystemZ::VLGVH || MI->getOpcode() == SystemZ::VLGVB) { + // Since extracting a halfword/byte element zero extends it to 32 bits, + // using that subelement without extraction can only be done directly + // when the extension is not needed. + for (auto &UseMI : MRI->use_nodbg_instructions(DefReg)) + if (!isUsingOnlyExtractedBits(&UseMI, MI, MRI)) { + LLVM_DEBUG(dumpEnclosureMsg("context ", MI);); + Reasons.insert(Reason::VLGVExt); + return false; + } + } + + return true; + } + + virtual bool findLanes(const MachineInstr *MI, + DenseMap &Lanes, + std::list &Worklist, + bool ToFinal, + const MachineRegisterInfo *MRI) const override { + if (MI->getOpcode() == SystemZ::VLGVG) { + Register DstReg = MI->getOperand(0).getReg(); + unsigned DstLanes = Lanes[DstReg] & VLGVElt2Lane(MI); + return updateLanes(DstReg, DstLanes, Lanes, MRI, Worklist, MI); + } + // VLGVF/VLGVH/VLGVB: These define a gr64bit reg which have the + // subreg_l32 COPY:ed from it. The dst-reg of that COPY will get the lane + // of the extracted element directly. + return false; + } + + bool convertInstr(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, + DenseMap &Lanes) override { + if (MI->getOpcode() == SystemZ::VLGVG) { + // Replace with a COPY. + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock::iterator Start = getPrevOrEnd(MI); + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY)) + .add(MI->getOperand(0)) + .add(MI->getOperand(1)); + LLVM_DEBUG(dumpConversion(MI, Start)); + } else { + // VLGVF/VLGVH/VLGVB: The COPY source operand will be reset to the VLGV + // source, so just remove this. + LLVM_DEBUG(dbgs() << "From : "; MI->dump();); + LLVM_DEBUG(dbgs() << " To (removed)\n";); + } + return true; + } + + int getExtraCost(const MachineInstr *MI, + MachineRegisterInfo *MRI) const override { + LLVM_DEBUG(dumpEnclosureMsg("Eliminating extraction: \t", MI)); + return VLGVReassignCost; + } +}; + +// A converter for loads of various immediates. +class ImmLoadConverter : public InstrConverterBase { + ImmediateType ImmType; + ElementImmLoader &ImmLoader; + +public: + ImmLoadConverter(unsigned SrcOpcode, ImmediateType ImmT, + ElementImmLoader &ImmLoader) + : InstrConverterBase(SrcOpcode), ImmType(ImmT), ImmLoader(ImmLoader) {} + + bool convertInstr(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, + DenseMap &Lanes) override { + MachineBasicBlock::iterator Start = getPrevOrEnd(MI); + ImmLoader.loadImmIntoVecElt(MI, + lane2EltIdx(Lanes[MI->getOperand(0).getReg()]), + MI->getOperand(1).getImm(), ImmType, + MI->getOperand(0).getReg()); + LLVM_DEBUG(dumpConversion(MI, Start)); + return true; + } + + // TODO: Return a cost based on number of instructions needed. +}; + +/// An instruction converter which replaces an instruction with another. +class InstrReplacer : public InstrConverterBase { +protected: + // If the vector lanes call for it, the "low" or "odd" opcode needs to be + // used. For example, unpacking F0 to G0 needs a VUPHF, while unpacking F2 + // to G0 would require VUPLF. In such a case DstOpcodeAlt is set for that + // particular MI during conversion. + unsigned DstOpcodeAlt; + +private: + // If a derived converter has set DstOpcodeAlt, return it this time and + // clear it. Otherwise return the regular DstOpcode. + unsigned getDstOpcodeToUse() { + unsigned Opc = DstOpcode; + if (DstOpcodeAlt) { + Opc = DstOpcodeAlt; + DstOpcodeAlt = 0; + } + return Opc; + } + +public: + /// Opcode of the destination instruction. + unsigned DstOpcode; + + InstrReplacer(unsigned SrcOpcode, unsigned DstOpcode) + : InstrConverterBase(SrcOpcode), DstOpcodeAlt(0), DstOpcode(DstOpcode) {} + + bool isLegal(const MachineInstr *MI, + const SystemZInstrInfo *TII, + const MachineRegisterInfo *MRI, + std::set &Reasons) const override { + if (!InstrConverterBase::isLegal(MI, TII, MRI, Reasons)) + return false; + + // It's illegal to replace an instruction that implicitly defines a register + // with an instruction that doesn't unless that register def is dead. + for (auto &MO : MI->implicit_operands()) + if (MO.isReg() && MO.isDef() && !MO.isDead() && + !TII->get(DstOpcode).hasImplicitDefOfPhysReg(MO.getReg())) { + LLVM_DEBUG(dumpEnclosureMsg("implicit reg", MI);); + Reasons.insert(Reason::ImplicitReg); + return false; + } + + // DstOpcode should not implictly define any register if MI doesn't. + const MCInstrDesc &DstMCID = TII->get(DstOpcode); + if (DstMCID.getNumImplicitDefs() > 0) + for (const MCPhysReg *Regs = DstMCID.getImplicitDefs(); *Regs; ++Regs) + if (!MI->definesRegister(*Regs)) { + LLVM_DEBUG(dumpEnclosureMsg("implicit reg", MI);); + Reasons.insert(Reason::ImplicitReg); + return false; + } + + return true; + } + +protected: + MachineInstr *replaceMI(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI) { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Opc = getDstOpcodeToUse(); + MachineInstrBuilder Bld = BuildMI(*MBB, MI, DL, TII->get(Opc)); + + // Transfer explicit operands from original instruction. + for (auto &Op : MI->explicit_operands()) + Bld.add(Op); + + if (MI->hasOneMemOperand()) + Bld.setMemRefs(MI->memoperands()); + + return Bld; + } + +public: + bool convertInstr(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, + DenseMap &Lanes) override { + MachineBasicBlock::iterator Start = getPrevOrEnd(MI); + replaceMI(MI, TII, MRI); + LLVM_DEBUG(dumpConversion(MI, Start)); + return true; + } +}; + +// A converter to replace a shift instruction where the immediate shift +// amount needs to be converted. +class ShiftReplacer : public InstrReplacer { +public: + ShiftReplacer(unsigned SrcOpcode, unsigned DstOpcode) + : InstrReplacer(SrcOpcode, DstOpcode) {} + + bool convertInstr(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, + DenseMap &Lanes) override { + MachineBasicBlock::iterator Start = getPrevOrEnd(MI); + MachineInstr *BuiltMI = InstrReplacer::replaceMI(MI, TII, MRI); + MachineOperand &ImmMO = BuiltMI->getOperand(3); + ImmMO.setImm(ImmMO.getImm() & 0xfff); // XXX modulo interpretation + LLVM_DEBUG(dumpConversion(MI, Start)); + return true; + } +}; + +// A converter to replace a scalar load with a load into a vector element. +class LoadReplacer : public InstrReplacer { +public: + LoadReplacer(unsigned SrcOpcode, unsigned DstOpcode) + : InstrReplacer(SrcOpcode, DstOpcode) {} + + bool convertInstr(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, + DenseMap &Lanes) override { + MachineBasicBlock::iterator Start = getPrevOrEnd(MI); + loadMemIntoVecElt(MI, TII, MRI, + lane2EltIdx(Lanes[MI->getOperand(0).getReg()]), + MI->getOperand(0).getReg()); + LLVM_DEBUG(dumpConversion(MI, Start)); + return true; + } +}; + +// A converter to replace a scalar store with a store of a vector element. +class StoreReplacer : public InstrReplacer { +public: + StoreReplacer(unsigned SrcOpcode, unsigned DstOpcode) + : InstrReplacer(SrcOpcode, DstOpcode) {} + + bool convertInstr(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, + DenseMap &Lanes) override { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock::iterator Start = getPrevOrEnd(MI); + + Register SavedReg = MI->getOperand(0).getReg(); + unsigned EltIdx = lane2EltIdx(Lanes[SavedReg]); + MachineMemOperand *MMO = *MI->memoperands_begin(); + unsigned SavedBytes = MMO->getSize(); + if (SavedBytes < 4) { + assert((is32BitReg(SavedReg, MRI) || + MRI->getVRegDef(SavedReg)->getOpcode() == SystemZ::VLVGF) + && "Expected truncating store from 32 bit register only."); + unsigned SubElts = 4 / SavedBytes; + EltIdx = ((EltIdx + 1) * SubElts) - 1; + } + + BuildMI(*MBB, MI, DL, TII->get(DstOpcode)) + .add(MI->getOperand(0)) + .add(MI->getOperand(1)) + .add(MI->getOperand(2)) + .add(MI->getOperand(3)) + .addImm(EltIdx) + .setMemRefs(MI->memoperands()); + LLVM_DEBUG(dumpConversion(MI, Start)); + return true; + } +}; + +// A converter to replace a scalar sign-extending load with a load into a +// vector element followed by an unpack. +class SExtLoadReplacer : public InstrReplacer { +public: + SExtLoadReplacer(unsigned SrcOpcode, unsigned DstOpcode) + : InstrReplacer(SrcOpcode, DstOpcode) {} + + bool convertInstr(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, + DenseMap &Lanes) override { + // Load into the same element index as that of the result and then unpack. + // TODO: sext of a loaded imm could do just vleig (subregliveness-04.ll). + MachineBasicBlock::iterator Start = getPrevOrEnd(MI); + Register VTmp1 = loadMemIntoVecElt(MI, TII, MRI, + lane2EltIdx(Lanes[MI->getOperand(0).getReg()])); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(DstOpcode)) + .add(MI->getOperand(0)) + .addReg(VTmp1); + LLVM_DEBUG(dumpConversion(MI, Start)); + return true; + } + + int getExtraCost(const MachineInstr *MI, + MachineRegisterInfo *MRI) const override { + LLVM_DEBUG(dumpEnclosureMsg("Extra cost for unpack : \t", MI)); + return 4; + } +}; + +// A converter to replace a scalar zero-extending load with a VLLEZ. +class ZExtLoadReplacer : public InstrReplacer { +public: + ZExtLoadReplacer(unsigned SrcOpcode, unsigned DstOpcode) + : InstrReplacer(SrcOpcode, DstOpcode) {} + + bool findLanes(const MachineInstr *MI, + DenseMap &Lanes, + std::list &Worklist, + bool ToFinal, + const MachineRegisterInfo *MRI) const override { + // A VLLEZx always loads into (the rightmost subelement of) G0. + Register DstReg = MI->getOperand(0).getReg(); + unsigned DstLanes = Lanes[DstReg] & (is64BitReg(DstReg, MRI) ? G0 : F1); + return updateLanes(DstReg, DstLanes, Lanes, MRI, Worklist, MI); + } +}; + +// A converter to replace a scalar register extension from 32 to 64 or 16 to +// 32 bits. +class RegExtReplacer : public InstrReplacer { + unsigned LowOpcode; + +public: + RegExtReplacer(unsigned SrcOpcode, unsigned DstOpcode, unsigned LOpc) + : InstrReplacer(SrcOpcode, DstOpcode), LowOpcode(LOpc) {} + + bool findLanes(const MachineInstr *MI, + DenseMap &Lanes, + std::list &Worklist, + bool ToFinal, + const MachineRegisterInfo *MRI) const override { + Register DstReg = MI->getOperand(0).getReg(); + Register SrcReg = MI->getOperand(1).getReg(); + unsigned DstLanes = Lanes[DstReg]; + unsigned SrcLanes = Lanes[SrcReg]; + // 32 to 64 bits: F0/F2 -> G0 F1/F3 -> G1 + // 16 to 32 bits: F0/F2 -> F1 F1/F3 -> F3 + bool ResultIs64Bit = is64BitReg(DstReg, MRI); + unsigned ResLane0 = ResultIs64Bit ? G0 : F1; + unsigned ResLane1 = ResultIs64Bit ? G1 : F3; + + if (ToFinal) { + selectLane(DstLanes); + // Make sure to select source idx 0 or 2 for dst idx 0. + if (DstLanes & ResLane0) + SrcLanes &= (F0 | F2); + selectLane(SrcLanes); + } + else { + DstLanes &= (ResLane0 | ResLane1); + applyLaneDeps(SrcLanes, (F0 | F2), DstLanes, ResLane0); + applyLaneDeps(SrcLanes, (F1 | F3), DstLanes, ResLane1); + } + assert((!ToFinal || + ((DstLanes == ResLane0 && (SrcLanes == F0 || SrcLanes == F2)) || + (DstLanes == ResLane1 && (SrcLanes == F1 || SrcLanes == F3)))) + && "Bad lanes for register extension."); + return updateLanes(DstReg, DstLanes, SrcReg, SrcLanes, + Lanes, MRI, Worklist, MI); + } + + bool convertInstr(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, + DenseMap &Lanes) override { + // Use unpack "low" opcode instead if called for. + if (lane2EltIdx(Lanes[MI->getOperand(1).getReg()]) >= 2) + DstOpcodeAlt = LowOpcode; + return InstrReplacer::convertInstr(MI, TII, MRI, Lanes); + } +}; + +// A converter for a reg/mem instruction. The memory operand is first loaded +// into a vector element. +class RegMemReplacer : public InstrReplacer { +public: + RegMemReplacer(unsigned SrcOpcode, unsigned DstOpcode) + : InstrReplacer(SrcOpcode, DstOpcode) {} + + bool convertInstr(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, + DenseMap &Lanes) override { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock::iterator Start = getPrevOrEnd(MI); + + Register VTmp1 = loadMemIntoVecElt(MI, TII, MRI, + lane2EltIdx(Lanes[MI->getOperand(1).getReg()])); + BuildMI(*MBB, MI, DL, TII->get(DstOpcode)) + .add(MI->getOperand(0)) + .add(MI->getOperand(1)) + .addReg(VTmp1); + LLVM_DEBUG(dumpConversion(MI, Start)); + return true; + } + + int getExtraCost(const MachineInstr *MI, + MachineRegisterInfo *MRI) const override { + LLVM_DEBUG(dumpEnclosureMsg("Extra cost for VLE : \t", MI)); + return 1; + } +}; + +// A converter for a reg/imm instruction. The immediate operand is first loaded +// into a vector element. +class RegImmReplacer : public InstrReplacer { + ImmediateType ImmType; + ElementImmLoader &ImmLoader; + +public: + RegImmReplacer(unsigned SrcOpcode, unsigned DstOpcode, ImmediateType ImmT, + ElementImmLoader &ImmLoader) + : InstrReplacer(SrcOpcode, DstOpcode), ImmType(ImmT), ImmLoader(ImmLoader) {} + + bool convertInstr(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, + DenseMap &Lanes) override { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock::iterator Start = getPrevOrEnd(MI); + + Register VTmp1 = ImmLoader.loadImmIntoVecElt(MI, + lane2EltIdx(Lanes[MI->getOperand(1).getReg()]), + MI->getOperand(2).getImm(), ImmType); + BuildMI(*MBB, MI, DL, TII->get(DstOpcode)) + .add(MI->getOperand(0)) + .add(MI->getOperand(1)) + .addReg(VTmp1); + LLVM_DEBUG(dumpConversion(MI, Start)); + return true; + } + + int getExtraCost(const MachineInstr *MI, + MachineRegisterInfo *MRI) const override { + LLVM_DEBUG(dumpEnclosureMsg("Extra cost, imm load : \t", MI)); + return 1; + } +}; + +// A converter for a multiply with reg/mem/imm instructions. A +// memory/immediate operand is first loaded into a vector element. +class MulReplacer : public InstrReplacer { + ImmediateType ImmType; + ElementImmLoader &ImmLoader; + +public: + MulReplacer(unsigned SrcOpcode, unsigned DstOpcode, ImmediateType ImmT, + ElementImmLoader &ImmLoader) + : InstrReplacer(SrcOpcode, DstOpcode), ImmType(ImmT), ImmLoader(ImmLoader) {} + + bool isLegal(const MachineInstr *MI, + const SystemZInstrInfo *TII, + const MachineRegisterInfo *MRI, + std::set &Reasons) const override { + if (!InstrConverterBase::isLegal(MI, TII, MRI, Reasons)) + return false; + + return true; + } + + bool findLanes(const MachineInstr *MI, + DenseMap &Lanes, + std::list &Worklist, + bool ToFinal, + const MachineRegisterInfo *MRI) const override { + Register DstReg = MI->getOperand(0).getReg(); + Register Src1Reg = MI->getOperand(1).getReg(); + Register Src2Reg = 0; + const MachineOperand &Src2MO = MI->getOperand(2); + if (Src2MO.isReg() && Lanes.find(Src2MO.getReg()) != Lanes.end()) + Src2Reg = Src2MO.getReg(); // XXX ADDR64? + + if (ToFinal) { + bool Change = selectLanesGeneric(MI, Lanes, Worklist, MRI); + assert(((((Lanes[Src1Reg] == F0 || Lanes[Src1Reg] == F1) && + Lanes[DstReg] == F1) || + ((Lanes[Src1Reg] == F2 || Lanes[Src1Reg] == F3) && + Lanes[DstReg] == F3)) && + (!Src2Reg || Lanes[Src1Reg] == Lanes[Src2Reg])) && + "Bad vector lanes for VMEF/VMOF"); + return Change; + } + + // VMEF (Vector Multiply Even Fullword) works on the even indexed + // elements. The result has double width, so its lane is the odd lane of + // the double sized element. VMOF works on the odd elements instead. + unsigned DstLanes = (F1 | F3) & Lanes[DstReg]; + unsigned SrcLanes = Lanes[Src1Reg]; + if (Src2Reg) + SrcLanes &= Lanes[Src2Reg]; + applyLaneDeps(SrcLanes, (F0 | F1), DstLanes, F1); + applyLaneDeps(SrcLanes, (F2 | F3), DstLanes, F3); + bool Change = updateLanes(DstReg, DstLanes, Src1Reg, SrcLanes, + Lanes, MRI, Worklist, MI); + if (Src2Reg) + Change |= updateLanes(Src2Reg, SrcLanes, Lanes, MRI, Worklist, MI); + return Change; + } + + bool convertInstr(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, + DenseMap &Lanes) override { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock::iterator Start = getPrevOrEnd(MI); + + Register RHSReg = SystemZ::NoRegister; + MachineOperand &Src2MO = MI->getOperand(2); + if (Src2MO.isImm()) + RHSReg = ImmLoader.loadImmIntoVecElt(MI, + lane2EltIdx(Lanes[MI->getOperand(1).getReg()]), + MI->getOperand(2).getImm(), ImmType); + else if (MI->getDesc().OpInfo[2].OperandType == MCOI::OPERAND_MEMORY) + RHSReg = loadMemIntoVecElt(MI, TII, MRI, + lane2EltIdx(Lanes[MI->getOperand(1).getReg()])); + unsigned Opc = DstOpcode; + // Use "odd" opcode instead if called for. + if (lane2EltIdx(Lanes[MI->getOperand(1).getReg()]) % 2 != 0) + Opc = SystemZ::VMOF; + MachineInstrBuilder Bld = BuildMI(*MBB, MI, DL, TII->get(Opc)); + Bld.add(MI->getOperand(0)); + Bld.add(MI->getOperand(1)); + if (RHSReg) + Bld.addReg(RHSReg); + else + Bld.add(Src2MO); + LLVM_DEBUG(dumpConversion(MI, Start)); + return true; + } +}; + +// A converter for fp<->int conversions. +class FPIntConvReplacer : public InstrReplacer { +public: + FPIntConvReplacer(unsigned SrcOpcode, unsigned DstOpcode) + : InstrReplacer(SrcOpcode, DstOpcode) {} + + bool isInt2FP() const override { + return (DstOpcode == SystemZ::WCDGB || DstOpcode == SystemZ::WCEFB || + DstOpcode == SystemZ::WCDLGB || DstOpcode == SystemZ::WCELFB); + } + + bool findLanes(const MachineInstr *MI, + DenseMap &Lanes, + std::list &Worklist, + bool ToFinal, + const MachineRegisterInfo *MRI) const override { + // The scalar FP conversion instructions correspond to the first lane. + unsigned SrcRegOpNo = MI->getOperand(1).isReg() ? 1 : 2; + Register Reg = MI->getOperand(isInt2FP() ? SrcRegOpNo : 0).getReg(); + unsigned RegLanes = Lanes[Reg] & (is64BitReg(Reg, MRI) ? G0 : F0); + return updateLanes(Reg, RegLanes, Lanes, MRI, Worklist, MI); + } + + bool convertInstr(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, + DenseMap &Lanes) override { + const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock::iterator Start = getPrevOrEnd(MI); + + const TargetRegisterClass *RC = + TRI->getRegClass(TII->get(DstOpcode).OpInfo[0].RegClass); + unsigned SubRegIdx = (RC == &SystemZ::VR64BitRegClass ? SystemZ::subreg_h64 + : SystemZ::subreg_h32); + Register VTmp0 = MRI->createVirtualRegister(RC); + Register VTmp1 = MRI->createVirtualRegister(RC); + unsigned M4 = 0; // XxC + + if (isInt2FP()) { + unsigned M5 = 0; // Rounding method + unsigned SrcRegOpNo = 1; + if (DstOpcode == SystemZ::WCDLGB || DstOpcode == SystemZ::WCELFB) { + M4 = MI->getOperand(3).getImm(); + M5 = MI->getOperand(1).getImm(); + SrcRegOpNo = 2; + } + + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY), VTmp0) + .addReg(MI->getOperand(SrcRegOpNo).getReg(), 0, SubRegIdx); + BuildMI(*MBB, MI, DL, TII->get(DstOpcode), VTmp1) + .addReg(VTmp0).addImm(M4).addImm(M5); + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY)) + .add(MI->getOperand(0)) + .addReg(VTmp1); + } + else { + Register VTmp2 = MRI->createVirtualRegister(RC); + unsigned M5 = MI->getOperand(1).getImm(); // Rounding method + if (DstOpcode == SystemZ::WCLGDB || DstOpcode == SystemZ::WCLFEB) + M4 = MI->getOperand(3).getImm(); + + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY), VTmp0) + .add(MI->getOperand(2)); + BuildMI(*MBB, MI, DL, TII->get(DstOpcode), VTmp1) + .addReg(VTmp0).addImm(M4).addImm(M5); + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), VTmp2); + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG)) + .add(MI->getOperand(0)) + .addReg(VTmp2).addReg(VTmp1).addImm(SubRegIdx); + } + + LLVM_DEBUG(dumpConversion(MI, Start)); + return true; + } +}; + +// A converter for comparisons. +class CompareReplacer : public InstrReplacer { + ImmediateType ImmType; + ElementImmLoader &ImmLoader; + + bool isCompareImm() const { + switch(SrcOpcode) { + case SystemZ::CHIMux: + case SystemZ::CGHI: + return true; + default: break; + } + llvm_unreachable("Unhandled opcode."); + return false; + } + + bool isCompareMem(const MachineInstr *MI) const { + return MI->getDesc().OpInfo[1].OperandType == MCOI::OPERAND_MEMORY; + } + +public: + CompareReplacer(unsigned SrcOpcode, unsigned DstOpcode, ImmediateType ImmT, + ElementImmLoader &ImmLoader) + : InstrReplacer(SrcOpcode, DstOpcode), ImmType(ImmT), ImmLoader(ImmLoader) {} + + bool isLegal(const MachineInstr *MI, + const SystemZInstrInfo *TII, + const MachineRegisterInfo *MRI, + std::set &Reasons) const override { + if (!InstrReplacer::isLegal(MI, TII, MRI, Reasons)) + return false; + + const MachineOperand &LHS = MI->getOperand(0); + const MachineOperand &RHS = MI->getOperand(1); + if (OnlyCmpW0Extr && RHS.isImm() && RHS.getImm() == 0 && + getVLGVDefMIFromReg(LHS.getReg(), MRI) == nullptr) { + // Only do a compare w/0 in cases that eliminates an element + // extraction. This is generally optimized in GPRs (by + // SystemZElimCompare), and it is potentially on the critical path of + // the function. + LLVM_DEBUG(dumpEnclosureMsg("not extract ", MI);); + Reasons.insert(Reason::Cmp0NoExtract); + return false; + } + + return true; + } + + bool findLanes(const MachineInstr *MI, + DenseMap &Lanes, + std::list &Worklist, + bool ToFinal, + const MachineRegisterInfo *MRI) const override { + // A VEC only works with the rightmost element of the high half (G0, F1, + // ...) + Register LHSReg = MI->getOperand(0).getReg(); + unsigned LHSLanes = Lanes[LHSReg] & (is64BitReg(LHSReg, MRI) ? G0 : F1); + bool Change = updateLanes(LHSReg, LHSLanes, Lanes, MRI, Worklist, MI); + if (!isCompareMem(MI) && MI->getOperand(1).isReg()) { + Register RHSReg = MI->getOperand(1).getReg(); + unsigned RHSLanes = Lanes[RHSReg] & (is64BitReg(RHSReg, MRI) ? G0 : F1); + Change |= updateLanes(RHSReg, RHSLanes, Lanes, MRI, Worklist, MI); + } + return Change; + } + + bool convertInstr(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, + DenseMap &Lanes) override { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock::iterator Start = getPrevOrEnd(MI); + + Register LHSReg = MI->getOperand(0).getReg(); + Register RHSReg = SystemZ::NoRegister; + MachineOperand &RHSMO = MI->getOperand(1); + if (RHSMO.isImm()) + RHSReg = ImmLoader.loadImmIntoVecElt(MI, lane2EltIdx(Lanes[LHSReg]), + RHSMO.getImm(), ImmType); + else if(isCompareMem(MI)) + RHSReg = loadMemIntoVecElt(MI, TII, MRI, lane2EltIdx(Lanes[LHSReg])); + + MachineInstrBuilder Bld = BuildMI(*MBB, MI, DL, TII->get(DstOpcode)); + Bld.add(MI->getOperand(0)); + if (RHSReg) + Bld.addReg(RHSReg); + else + Bld.add(RHSMO); + LLVM_DEBUG(dumpConversion(MI, Start)); + return true; + } + + int getExtraCost(const MachineInstr *MI, + MachineRegisterInfo *MRI) const override { + if (MI->getOperand(1).isImm()) { + LLVM_DEBUG(dumpEnclosureMsg("Loading compare imm : \t", MI)); + return 1; + } + if (isCompareMem(MI)) { + LLVM_DEBUG(dumpEnclosureMsg("Loading compare mem : \t", MI)); + return 1; + } + return 0; + } +}; + +class LoadAddressReplacer : public InstrConverterBase { + ElementImmLoader &ImmLoader; + +public: + LoadAddressReplacer(unsigned SrcOpcode, ElementImmLoader &ImmLoader) : + InstrConverterBase(SrcOpcode), ImmLoader(ImmLoader) {} + + bool isLegal(const MachineInstr *MI, + const SystemZInstrInfo *TII, + const MachineRegisterInfo *MRI, + std::set &Reasons) const override { + if (!InstrConverterBase::isLegal(MI, TII, MRI, Reasons)) + return false; + return MI->getOperand(1).isReg(); // Don't convert a FrameIndex + } + + bool convertInstr(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, + DenseMap &Lanes) override { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock::iterator Start = getPrevOrEnd(MI); + + MachineOperand BaseMO = MI->getOperand(1); + int64_t Displ = MI->getOperand(2).getImm(); + MachineOperand IdxMO = MI->getOperand(3); + assert(BaseMO.getReg() && (IdxMO.getReg() || Displ) && + "Noop Load Address?"); + + Register LoadedImmReg = !Displ ? SystemZ::NoRegister + : ImmLoader.loadImmIntoVecElt(MI, + lane2EltIdx(Lanes[MI->getOperand(0).getReg()]), + Displ, ImmediateType::SE32); + + Register TmpReg = SystemZ::NoRegister; + if (Displ && IdxMO.getReg()) { + TmpReg = MRI->createVirtualRegister(&SystemZ::VR128BitRegClass); + BuildMI(*MBB, MI, DL, TII->get(SystemZ::VAG), TmpReg) + .addReg(LoadedImmReg) + .add(IdxMO); + } + else if (Displ) + TmpReg = LoadedImmReg; + else + TmpReg = IdxMO.getReg(); + + BuildMI(*MBB, MI, DL, TII->get(SystemZ::VAG)) + .add(MI->getOperand(0)) + .add(BaseMO) + .addReg(TmpReg); + LLVM_DEBUG(dumpConversion(MI, Start)); + return true; + } + + // TODO: getExtraCost() +}; + +class ExperimentalReplacer : public InstrConverterBase { + ElementImmLoader &ImmLoader; + +public: + ExperimentalReplacer(unsigned SrcOpcode, ElementImmLoader &ImmLoader) : + InstrConverterBase(SrcOpcode), ImmLoader(ImmLoader) {} + + bool isLegal(const MachineInstr *MI, + const SystemZInstrInfo *TII, + const MachineRegisterInfo *MRI, + std::set &Reasons) const override { + if (!InstrConverterBase::isLegal(MI, TII, MRI, Reasons)) + return false; + if (MI->getOpcode() == SystemZ::INSERT_SUBREG) { + if (MRI->getRegClass(MI->getOperand(0).getReg()) == + &SystemZ::GR128BitRegClass) + return false; + if (MI->getOperand(3).getImm() != SystemZ::subreg_l32) + return false; + if (MRI->getVRegDef(MI->getOperand(1).getReg())->getOpcode() != + SystemZ::IMPLICIT_DEF) + return false; + } + return true; + } + + virtual bool findLanes(const MachineInstr *MI, + DenseMap &Lanes, + std::list &Worklist, + bool ToFinal, + const MachineRegisterInfo *MRI) const override { + if (MI->getOpcode() == SystemZ::INSERT_SUBREG) { + if (ToFinal) + return selectLanesGeneric(MI, Lanes, Worklist, MRI); + const MachineOperand &DstMO = MI->getOperand(0); + const MachineOperand &Src1MO = MI->getOperand(1); + const MachineOperand &Src2MO = MI->getOperand(2); + unsigned DstLanes = Lanes[DstMO.getReg()]; + unsigned Src1Lanes = Lanes[Src1MO.getReg()]; + DstLanes &= Src1Lanes; + unsigned Src2Lanes = Lanes[Src2MO.getReg()] & (F1 | F3); + applyLaneDeps(Src2Lanes, F1, DstLanes, G0); + applyLaneDeps(Src2Lanes, F3, DstLanes, G1); + bool Change = updateLanes(DstMO.getReg(), DstLanes, Lanes, MRI, Worklist, MI); + Change |= updateLanes(Src1MO.getReg(), Src1Lanes, Lanes, MRI, Worklist, MI); + Change |= updateLanes(Src2MO.getReg(), Src2Lanes, Lanes, MRI, Worklist, MI); + return Change; + } + + return InstrConverterBase::findLanes(MI, Lanes, Worklist, ToFinal, MRI); + } + + bool convertInstr(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, + DenseMap &Lanes) override { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock::iterator Start = getPrevOrEnd(MI); + + if (MI->getOpcode() == SystemZ::RISBGN) { + // IGNORE the immediate operands and just do an AND with '1'... + Register LoadedImmReg = ImmLoader.loadImmIntoVecElt(MI, + lane2EltIdx(Lanes[MI->getOperand(0).getReg()]), + 1, ImmediateType::SE16); + BuildMI(*MBB, MI, DL, TII->get(SystemZ::VN)) + .add(MI->getOperand(0)) + .add(MI->getOperand(2)) + .addReg(LoadedImmReg); + LLVM_DEBUG(dumpConversion(MI, Start)); + return true; + } + if (MI->getOpcode() == SystemZ::INSERT_SUBREG) { + // Replace with a COPY. + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY)) + .add(MI->getOperand(0)) + .add(MI->getOperand(2)); + LLVM_DEBUG(dumpConversion(MI, Start)); + return true; + } + + llvm_unreachable("Unhandled opcode"); + return false; + } +}; + +void sortMIs(const MachineDominatorTree *MDT, std::list &MIs) { + auto comp = [&MDT](const MachineInstr *A, const MachineInstr *B)-> bool { + return MDT->dominates(A, B) || A < B; }; + MIs.sort(comp); +} + +///// Closure: a set of connected virtual registers (edges) and instructions. +struct Closure { + /// Virtual registers in the closure. + DenseSet Edges; + + /// Instructions in the closure. + SmallPtrSet Instrs; + + /// Instructions in the closure that do not have a converter and therefore + /// would have to be inserted into a vector element with a VLVG. + SmallPtrSet Insertions; + + /// Instructions in the closure that have one or more users that cannot be + /// converted and therefore would have to be extracted into a scalar GPR + /// with a VLGV. + SmallPtrSet Extractions; + + /// A set of possible vector lanes for each reassigned register. + /// LSB represents lane (vector element) 0. + DenseMap Lanes; + + DenseSet FPLaneRegs; + + /// True if all enclosed instructions can legally be reassigned. + bool Legal; + + // Costs + unsigned MaxLoopDepth; // TODO: worth checking for separate loop nests? + unsigned DefsMinLoopDepth; + int TotalCost; + bool WillReassign; + bool HasCalls; + Closure() : Legal(true), MaxLoopDepth(0), DefsMinLoopDepth(~0), TotalCost(0), + WillReassign(false), HasCalls(false) {} + + using const_edge_iterator = DenseSet::const_iterator; + iterator_range edges() const { + return iterator_range(Edges.begin(), Edges.end()); + } + + bool allRegsInFPLane(MachineRegisterInfo *MRI) { + for (Register Reg : Edges) { + // These are special + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (DefMI->getOpcode() == SystemZ::VLGVF || + DefMI->getOpcode() == SystemZ::VLGVH || + DefMI->getOpcode() == SystemZ::VLGVB) + continue; + if (Lanes[Reg] != G0 && Lanes[Reg] != F0) + return false; + } + return true; + } + + LLVM_DUMP_METHOD void dump(const MachineDominatorTree *MDT = nullptr) const { + const MachineRegisterInfo *MRI = + &(*Instrs.begin())->getParent()->getParent()->getRegInfo(); + + dbgs() << "Registers: "; + bool First = true; + unsigned Max = 10; + for (Register Reg : Edges) { + if (!First) + dbgs() << ", "; + First = false; + if (!--Max) { + dbgs() << "..."; + break; + } + dbgs() << printReg(Reg, MRI->getTargetRegisterInfo(), 0, MRI); + } + dbgs() << "\n"; + if (MDT == nullptr) + return; + std::list MIs(Instrs.begin(), Instrs.end()); + for (MachineInstr *MI : Insertions) + MIs.push_back(MI); + sortMIs(MDT, MIs); + dbgs() << "Instructions:\n"; + for (MachineInstr *MI : MIs) { + if (Insertions.count(MI)) + dbgs() << "I "; + else if (Extractions.count(MI)) + dbgs() << "E "; + else + dbgs() << " "; + MI->print(dbgs()); + } + dbgs() << "\n"; + } + + std::string Opcodes; + std::set Reasons; + + void dump_reasons(const MachineRegisterInfo *MRI) const { + dbgs() << "Unable to reassign: "; + std::map Messages; + Messages[Reason::IllegalInsertion] = "IllegalInsertion"; + Messages[Reason::Address] = "Address"; + Messages[Reason::Offset] = "Offset"; + Messages[Reason::SubReg] = "SubReg"; + Messages[Reason::PhysRegIN] = "PhysRegIN"; + Messages[Reason::PhysRegOUT] = "PhysRegOUT"; + Messages[Reason::VLGVVarElt] = "VLGVVarElt"; + Messages[Reason::VLGVSearch] = "VLGVSearch"; + Messages[Reason::VLGVExt] = "VLGVExt"; + Messages[Reason::ImplicitReg] = "ImplicitReg"; + Messages[Reason::Cmp0NoExtract] = "Cmp0NoExtract"; + Messages[Reason::CCUsers] = "CCUsers"; + Messages[Reason::CCUserMask] = "CCUserMask"; + Messages[Reason::CmpImmediate] = "CmpImmediate"; + Messages[Reason::HasCalls] = "HasCalls"; + Messages[Reason::HasCallsInner] = "HasCallsInner"; + Messages[Reason::HasCallsDefBeforeLoop] = "HasCallsDefBeforeLoop"; + Messages[Reason::HasCallsLanes] = "HasCallsLanes"; + Messages[Reason::HasCallsPHIs] = "HasCallsPHIs"; + Messages[Reason::HasCallsUsers] = "HasCallsUsers"; + Messages[Reason::NumScalarInsertions] = "NumScalarInsertions"; + Messages[Reason::InsertionOnly] = "InsertionOnly"; + Messages[Reason::EndsWithInt2FP] = "EndsWithInt2FP"; + Messages[Reason::Lanes] = "Lanes"; + Messages[Reason::Extraction] = "Extraction"; + Messages[Reason::InsertVLGV] = "InsertVLGV"; + Messages[Reason::NoVLGV] = "NoVLGV"; + + for (auto R : Reasons) + dbgs() << Messages[R] << ", "; + if (Opcodes.length() > 0) + dbgs() << "(" << Opcodes << ") "; + dump(); + } +}; + +struct RegUses { + std::vector GPRUses; + std::vector AddrUses; + + RegUses(Register Reg, MachineRegisterInfo *MRI) { + for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) + for (unsigned I = 0, E = UseMI.getNumExplicitOperands(); I != E; ++I) { + MachineOperand &Op = UseMI.getOperand(I); + if (Op.isReg() && Op.getReg() == Reg) { + if (usedInAddress(&UseMI, I)) + AddrUses.push_back(&Op); + else + GPRUses.push_back(&Op); + } + } + } + + std::vector ScalarUses; + + RegUses(Register Reg, MachineRegisterInfo *MRI, Closure &C) + : RegUses(Reg, MRI) { + ScalarUses = AddrUses; + for (auto MO : GPRUses) + if (!C.Instrs.count(MO->getParent())) + ScalarUses.push_back(MO); + } +}; + +static void replaceRegForScalarUses(Register OrigScalReg, Register NewScalReg, + Closure &C, MachineRegisterInfo *MRI) { + RegUses UseOps(OrigScalReg, MRI, C); + for (auto MO : UseOps.ScalarUses) + MO->setReg(NewScalReg); +} + +// Insert a scalar value (the vreg defined by MI) into a vector element when +// there is no converter available. +static void insertReg(MachineInstr *MI, const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, Closure &C) { + MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock::iterator InsPt = + MI->isPHI() ? MBB->getFirstNonPHI() : std::next(MI->getIterator()); + DebugLoc DL = MI->getDebugLoc(); + + // Replace the scalar register with a new one which will remain scalar, + // while OrigScalReg will be reassigned. + Register OrigScalReg = MI->getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(OrigScalReg); + if (RC == &SystemZ::GRX32BitRegClass) + RC = &SystemZ::GR32BitRegClass; + Register NewScalReg = MRI->createVirtualRegister(RC); + replaceRegForScalarUses(OrigScalReg, NewScalReg, C, MRI); + MI->getOperand(0).setReg(NewScalReg); + + unsigned EltIdx = lane2EltIdx(C.Lanes[OrigScalReg]); + unsigned VLVGOpc = is64BitReg(OrigScalReg, MRI) ? SystemZ::VLVGG + : SystemZ::VLVGF; + Register VTmp0 = MRI->createVirtualRegister(&SystemZ::VR128BitRegClass); + MachineBasicBlock::iterator Built_IMPLDEF = + BuildMI(*MBB, InsPt, DL, TII->get(TargetOpcode::IMPLICIT_DEF), VTmp0); + MachineBasicBlock::iterator Built_VLVG = + BuildMI(*MBB, InsPt, DL, TII->get(VLVGOpc), OrigScalReg) + .addReg(VTmp0) + .addReg(NewScalReg) + .addReg(SystemZ::NoRegister) + .addImm(EltIdx); + + LLVM_DEBUG(dbgs() << "From : "; MI->dump();); + LLVM_DEBUG(dbgs() << " Ins "; Built_IMPLDEF->dump();); + LLVM_DEBUG(dbgs() << " "; Built_VLVG->dump();); +} + +// Find a place to extract a reassigned value. Return true if extracting is +// ok, for instance outside of a loop, or false if it should be avoided (it's +// slow). + +// The liveness of a closure should reflect the extraction point. For now: +// only allow the case of extracting a value defined in a loop and extracted +// outside the loop, and let the closure be live all the way to the scalar +// user. +static bool findExtractionInsPt(MachineInstr *MI, + MachineRegisterInfo *MRI, + Closure &C, const MachineLoopInfo *MLI, + MachineBasicBlock *&InsMBB, + MachineBasicBlock::iterator &InsPt) { + if (DisableExtractions) + return false; + + Register DefReg = getDefedGPRReg(MI, MRI); + + // If MI is inside a loop, allow extraction outside of the loop. + // TODO: MI could also be reassigned before loop and extracted outside of it. + if (MachineLoop *ML = MLI->getLoopFor(MI->getParent())) { + RegUses UseOps(DefReg, MRI, C); + for (auto MO : UseOps.ScalarUses) + if (ML->contains(MO->getParent()->getParent())) + return false; + + InsMBB = ML->getExitBlock(); + if (InsMBB != nullptr) { + InsPt = InsMBB->begin(); + return true; + } + + return false; + } + + // TODO: Try to extract also in general cases. The + // VirtRegLiveness/LiveClosuresTracker should then reflect the point of the + // extraction point returned from here. + // Allow (slow) extraction if the user is not directly following. + // unsigned Count = 0; + // for (MachineBasicBlock::iterator II = MI->getIterator(), + // EE = MI->getParent()->end(); II != EE; II++) { + // if (II->isDebugInstr()) + // continue; + // if (II->isCall()) + // Count += 10; + // if (ScalUsers.count(&*II) || ++Count > 10) + // break; + // } + // if (Count > 10) { + // InsMBB = MI->getParent(); + // InsPt = std::next(MI->getIterator()); + // return true; + // } + return false; +} + +static void extractReg(MachineInstr *MI, + const SystemZInstrInfo *TII, + MachineRegisterInfo *MRI, Closure &C, + const MachineLoopInfo *MLI) { + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock *InsMBB; + MachineBasicBlock::iterator InsPt; + bool Success = findExtractionInsPt(MI, MRI, C, MLI, InsMBB, InsPt); + (void)Success; assert(Success && "Expected to find an extraction point."); + + Register DefReg = getDefedGPRReg(MI, MRI); + Register NewScalReg = MRI->createVirtualRegister(MRI->getRegClass(DefReg)); + replaceRegForScalarUses(DefReg, NewScalReg, C, MRI); + + unsigned EltIdx = lane2EltIdx(C.Lanes[DefReg]); + if (is64BitReg(DefReg, MRI)) { + MachineBasicBlock::iterator Built_VLGV = + BuildMI(*InsMBB, InsPt, DL, TII->get(SystemZ::VLGVG), NewScalReg) + .addReg(DefReg) + .addReg(SystemZ::NoRegister) + .addImm(EltIdx); + LLVM_DEBUG(dbgs() << "Extr "; Built_VLGV->dump();); + } else { + assert(isFullWordLane(C.Lanes[DefReg]) && "Extracting 8/16 bits?"); + Register TmpReg = MRI->createVirtualRegister(&SystemZ::GR64BitRegClass); + MachineBasicBlock::iterator Built_VLGV = + BuildMI(*InsMBB, InsPt, DL, TII->get(SystemZ::VLGVF), TmpReg) + .addReg(DefReg) + .addReg(SystemZ::NoRegister) + .addImm(EltIdx); + LLVM_DEBUG(dbgs() << "Extr "; Built_VLGV->dump();); + + MachineBasicBlock::iterator Built_COPY = + BuildMI(*InsMBB, InsPt, DL, TII->get(SystemZ::COPY), NewScalReg) + .addReg(TmpReg, 0, SystemZ::subreg_l32); + LLVM_DEBUG(dbgs() << "Extr "; Built_COPY->dump();); + } +} + +/// A class for traversing the function while keeping track of live virtual +/// registers and closures. Depends on Closures not being modified. +class LiveClosuresTracker { + std::vector &Closures; + VirtRegLiveness &VRLiveness; + std::map &Reg2ClosureIdx; + + struct Cmp { bool operator() (const Closure *A, const Closure *B) const { + if (A->TotalCost == B->TotalCost) + return A < B; + return A->TotalCost < B->TotalCost; + }}; + std::set LiveClosures; + std::map > ClosureLiveRegs; + + Closure* getRegClosure(Register Reg) { + if (Reg2ClosureIdx.find(Reg) == Reg2ClosureIdx.end()) + return nullptr; + unsigned Idx = Reg2ClosureIdx[Reg]; + return &Closures[Idx]; + } + + void addLiveClosureReg(Closure *C, Register Reg) { + LiveClosures.insert(C); + ClosureLiveRegs[C].insert(Reg); + } + + bool willReassignReg(Register Reg) { + if (Closure *C = getRegClosure(Reg)) + return C->WillReassign; + return false; + } + + unsigned MBBLoopDepth; +public: + LiveClosuresTracker(std::vector &C, VirtRegLiveness &VRL, + std::map &R2CIdx) + : Closures(C), VRLiveness(VRL), Reg2ClosureIdx(R2CIdx), MBBLoopDepth(0), + NumLiveVecRegs(0) {} + + std::set LiveGPRs; + unsigned NumLiveVecRegs; + + void enterMBB(MachineBasicBlock *MBB, const MachineLoopInfo *MLI, + bool IsolateLoops, bool Dump = true); + void advance(const MachineInstr *MI); + void processCall(const MachineInstr *MI, MachineRegisterInfo *MRI); + void findFPLanesAroundCall(MachineInstr *MI, MachineRegisterInfo *MRI, + const SystemZInstrInfo *TII, const MachineLoopInfo *MLI); + + LLVM_DUMP_METHOD void dumpRegUsage() const; + void pickOneClosure(std::set &AlreadyDumped, + const MachineRegisterInfo *MRI); +}; + +void LiveClosuresTracker::enterMBB(MachineBasicBlock *MBB, + const MachineLoopInfo *MLI, + bool IsolateLoops, bool Dump) { + const MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); + MBBLoopDepth = MLI->getLoopDepth(MBB); + + for (Register Reg : VRLiveness.VRegLiveIns[MBB]) { + if (IsolateLoops && VRLiveness.isLoopLiveThroughNotUsed(Reg, MBB, MLI)) + continue; + if (Closure *C = getRegClosure(Reg)) + addLiveClosureReg(C, Reg); + if (isVecReg(Reg, MRI) || willReassignReg(Reg)) + NumLiveVecRegs++; + else if (isAnyGPR(Reg, MRI)) + LiveGPRs.insert(Reg); + } + + LLVM_DEBUG(if (Dump) { + dbgs() << "Entering bb." << MBB->getNumber() << "." + << MBB->getName() << "\n"; + dumpRegUsage(); + if (!LiveClosures.empty() && FullDumpDomainReass) { + dbgs() << "Live closures:\n"; + for (auto *C : LiveClosures) + C->dump(); + } + }); +} + +void LiveClosuresTracker::advance(const MachineInstr *MI) { + const MachineRegisterInfo *MRI = &MI->getParent()->getParent()->getRegInfo(); + if (MI->isDebugInstr()) + return; + + std::set SeenRegUses; + for (const MachineOperand &MO : MI->uses()) + // if (MO.isReg() && MO.isKill()) { + if (MO.isReg() && SeenRegUses.insert(MO.getReg()).second && + VRLiveness.regKilledByMI(MO.getReg(), MI)) { + if (Closure *C = getRegClosure(MO.getReg())) { + assert(LiveClosures.count(C) && "Closure was live."); + assert(ClosureLiveRegs[C].count(MO.getReg()) && "Enclosed reg was live."); + ClosureLiveRegs[C].erase(MO.getReg()); + if (ClosureLiveRegs[C].empty()) + LiveClosures.erase(C); + } + if (isVecReg(MO.getReg(), MRI) || willReassignReg(MO.getReg())) + NumLiveVecRegs = NumLiveVecRegs ? NumLiveVecRegs - 1 : 0; + else + LiveGPRs.erase(MO.getReg()); + } + + for (const MachineOperand &MO : MI->defs()) { + if (!Register::isVirtualRegister(MO.getReg())) + continue; + if (Closure *C = getRegClosure(MO.getReg())) + addLiveClosureReg(C, MO.getReg()); + if (isVecReg(MO.getReg(), MRI) || willReassignReg(MO.getReg())) + NumLiveVecRegs++; + else if (isAnyGPR(MO.getReg(), MRI)) + LiveGPRs.insert(MO.getReg()); + } +} + +// Check for a call inside live closures. TODO: INLINEASM physreg clobbers? +void LiveClosuresTracker:: +processCall(const MachineInstr *MI, MachineRegisterInfo *MRI) { + assert(MI->isCall() && "MI is not a call instruction?"); + for (auto *C : LiveClosures) { + C->HasCalls = true; + if (MBBLoopDepth > C->MaxLoopDepth) { + // (experimental) Save FP-lanes for closures inside the loop instead. + C->Reasons.insert(Reason::HasCallsInner); + C->Legal = false; + } + if (MBBLoopDepth > C->DefsMinLoopDepth) { + // Def is before loop containing call. + C->Reasons.insert(Reason::HasCallsDefBeforeLoop); + C->Legal = false; + } + if (!C->allRegsInFPLane(MRI)) { + C->Reasons.insert(Reason::HasCallsLanes); + C->Legal = false; + } + for (auto Reg : ClosureLiveRegs[C]) { + // Currently PHI nodes are only supported in simple cases. + unsigned PHIUses = 0; + for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) + if (UseMI.isPHI() && ++PHIUses > 1) { + C->Reasons.insert(Reason::HasCallsPHIs); + C->Legal = false; + } + if (!MultipleUsersCalls && !MRI->hasOneNonDBGUse(Reg)) { + C->Reasons.insert(Reason::HasCallsUsers); + C->Legal = false; + } + } + } +} + +void LiveClosuresTracker:: +findFPLanesAroundCall(MachineInstr *MI, MachineRegisterInfo *MRI, + const SystemZInstrInfo *TII, const MachineLoopInfo *MLI) { + assert(MI->isCall()); + for (auto *C : LiveClosures) + if (C->WillReassign && C->MaxLoopDepth == MBBLoopDepth) { + assert(C->HasCalls && "HasCalls not set?"); + for (auto Reg : ClosureLiveRegs[C]) { + C->FPLaneRegs.insert(Reg); + } + } +} + +void LiveClosuresTracker::dumpRegUsage() const { + LLVM_DEBUG( dbgs() << "Live GPRs: " << LiveGPRs.size() + << ", Live vector regs: " << NumLiveVecRegs << ".\n";); +} + +void LiveClosuresTracker::pickOneClosure(std::set &AlreadyDumped, + const MachineRegisterInfo *MRI) { + for (auto *C : LiveClosures) { + if (!C->WillReassign && C->Legal && MBBLoopDepth == C->MaxLoopDepth) { + if (C->HasCalls && (NumLiveVecRegs > VecSavedLim)) + continue; + LLVM_DEBUG( dumpRegUsage();); + LLVM_DEBUG(dbgs() << "--- Picked closure for reassignment: ";); + LLVM_DEBUG(C->dump()); + C->WillReassign = true; + for (Register Reg : C->Edges) + if (LiveGPRs.erase(Reg)) + NumLiveVecRegs++; + LLVM_DEBUG( dumpRegUsage();); + return; + } + } + + // Dump closures that were live but could not be reassigned when none was + // found. Skip Closures in blocks outside of their loops. + for (auto *C : LiveClosures) { + if (!C->WillReassign && MBBLoopDepth == C->MaxLoopDepth && + AlreadyDumped.insert(C).second) + LLVM_DEBUG( C->dump_reasons(MRI)); + } +} + +class SystemZDomainReassignment : public MachineFunctionPass { + const SystemZSubtarget *STI = nullptr; + MachineRegisterInfo *MRI = nullptr; + const SystemZInstrInfo *TII = nullptr; + const MachineDominatorTree *MDT = nullptr; + const MachineLoopInfo *MLI = nullptr; + + /// A map of available instruction converters. Since the only destination + /// domain is vector, a converter is identified by the source opcode. + DenseMap> Converters; + + bool hasConverter(unsigned Opcode) const { + return Converters.find(Opcode) != Converters.end(); + } + + typedef const std::unique_ptr& ConvPtrRef; + ConvPtrRef findConverter(unsigned Opcode) const { + assert(hasConverter(Opcode) && "Expected to find a converter."); + return Converters.find(Opcode)->second; + } + + bool isConvertible(const MachineInstr *MI, + const SystemZInstrInfo *TII, + const MachineRegisterInfo *MRI, + std::set &Reasons) { + return Converters.find(MI->getOpcode()) != Converters.end() && + Converters[MI->getOpcode()]->isLegal(MI, TII, MRI, Reasons); + } + + /// Initialize Converters map. + void initConverters(ElementImmLoader &ImmLoader); + + /// Return true if this register is a candidate for GPR->Vector reassignment. + bool isGPRDomain(Register Reg) const { return isGPRDomainReg(Reg, MRI); } + + /// Starting from \Reg, expand the closure as much as possible. + void buildClosure(Closure &, Register Reg); + + /// Iterate over the closure and find the vector lanes for all registers. + void findVectorLanes(Closure &C, bool ToFinal, const MachineRegisterInfo *MRI); + + /// Reassign the closure to the vector domain. + void reassign(Closure &C) const; + + /// Compute the sum of conversion costs for the Closure. + void computeClosureCost(Closure &C, const MachineLoopInfo *MLI); + + /// Scan Reg from definition to users and collect information. + void scanRegister(Register Reg, unsigned &Calls); + +public: + static char ID; + + SystemZDomainReassignment() : MachineFunctionPass(ID) { + initializeSystemZDomainReassignmentPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return SYSTEMZ_DOMAINREASSIGN_NAME; } + + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +char SystemZDomainReassignment::ID = 0; + +} // End anonymous namespace. + +INITIALIZE_PASS(SystemZDomainReassignment, "systemz-domain-reassignment", + SYSTEMZ_DOMAINREASSIGN_NAME, false, false) + +/// Returns an instance of the Domain Reassignment pass. +FunctionPass *llvm:: +createSystemZDomainReassignmentPass(SystemZTargetMachine &TM) { + return new SystemZDomainReassignment(); +} + +void SystemZDomainReassignment::initConverters(ElementImmLoader &ImmLoader) { + // Define some utility functions for building the Converters map. + auto createPseudoConverter = [&](unsigned Opc) { + Converters[Opc] = std::make_unique(Opc); + }; + auto createCOPYConverter = [&]() { + Converters[SystemZ::COPY] = std::make_unique(); + }; + auto createVLGVConverter = [&](unsigned Opc) { + Converters[Opc] = std::make_unique(Opc); + }; + auto createImmLoadConverter = [&](unsigned From, ImmediateType ImmT) { + Converters[From] = std::make_unique(From, ImmT, ImmLoader); + }; + auto createReplacer = [&](unsigned From, unsigned To) { + Converters[From] = std::make_unique(From, To); + }; + auto createShiftReplacer = [&](unsigned From, unsigned To) { + Converters[From] = std::make_unique(From, To); + }; + auto createLoadReplacer = [&](unsigned From, unsigned To) { + Converters[From] = std::make_unique(From, To); + }; + auto createStoreReplacer = [&](unsigned From, unsigned To) { + Converters[From] = std::make_unique(From, To); + }; + auto createSExtLoadReplacer = [&](unsigned From, unsigned To) { + Converters[From] = std::make_unique(From, To); + }; + auto createZExtLoadReplacer = [&](unsigned From, unsigned To) { + Converters[From] = std::make_unique(From, To); + }; + auto createRegExtReplacer = [&](unsigned From, unsigned To, unsigned LowOpc) { + Converters[From] = std::make_unique(From, To, LowOpc); + }; + auto createRegMemReplacer = [&](unsigned From, unsigned To) { + Converters[From] = std::make_unique(From, To); + }; + auto createRegImmReplacer = [&](unsigned From, unsigned To, + ImmediateType ImmT) { + Converters[From] = std::make_unique(From, To, ImmT, + ImmLoader); + }; + auto createMulReplacer = [&](unsigned From, unsigned To, ImmediateType ImmT) { + Converters[From] = std::make_unique(From, To, ImmT, ImmLoader); + }; + auto createFPIntConvReplacer = [&](unsigned From, unsigned To) { + Converters[From] = std::make_unique(From, To); + }; + auto createCompareReplacer = [&](unsigned From, unsigned To, + ImmediateType ImmT) { + Converters[From] = std::make_unique(From, To, ImmT, + ImmLoader); + }; + auto createLoadAddressReplacer = [&](unsigned From) { + Converters[From] = std::make_unique(From, ImmLoader); + }; + auto createExperimentalReplacer = [&](unsigned From) { + Converters[From] = std::make_unique(From, ImmLoader); + }; + + // Pseudo converters. + createPseudoConverter(SystemZ::PHI); + createPseudoConverter(SystemZ::IMPLICIT_DEF); + createCOPYConverter(); + + // Loads and stores + createLoadReplacer(SystemZ::LG, SystemZ::VLEG); + createLoadReplacer(SystemZ::LMux, SystemZ::VLEF); + createStoreReplacer(SystemZ::STG, SystemZ::VSTEG); + createStoreReplacer(SystemZ::STMux, SystemZ::VSTEF); + createStoreReplacer(SystemZ::ST, SystemZ::VSTEF); + createStoreReplacer(SystemZ::STHMux, SystemZ::VSTEH); + createStoreReplacer(SystemZ::STH, SystemZ::VSTEH); + createStoreReplacer(SystemZ::STCMux, SystemZ::VSTEB); + createStoreReplacer(SystemZ::STC, SystemZ::VSTEB); + // VSTER (z15, rare)? + + createZExtLoadReplacer(SystemZ::LLGF, SystemZ::VLLEZF); + createZExtLoadReplacer(SystemZ::LLGH, SystemZ::VLLEZH); + createZExtLoadReplacer(SystemZ::LLGC, SystemZ::VLLEZB); + createZExtLoadReplacer(SystemZ::LLHMux, SystemZ::VLLEZH); + createZExtLoadReplacer(SystemZ::LLCMux, SystemZ::VLLEZB); + + if (EnableMemExt) { + // Extensions from memory, which use one extra sequential instruction. + createSExtLoadReplacer(SystemZ::LGF, SystemZ::VUPHF); + createSExtLoadReplacer(SystemZ::LHMux, SystemZ::VUPHH); + } + + // Vector element extractions. + if (EnableVLGVs || ForceVLGV || EnableAll) { + createVLGVConverter(SystemZ::VLGVG); + createVLGVConverter(SystemZ::VLGVF); + createVLGVConverter(SystemZ::VLGVH); + createVLGVConverter(SystemZ::VLGVB); + } + + // Register with register instructions + if (EnableRegReg || EnableAll) { + createReplacer(SystemZ::AGRK, SystemZ::VAG); + createReplacer(SystemZ::ARK, SystemZ::VAF); + createReplacer(SystemZ::SGRK, SystemZ::VSG); + createReplacer(SystemZ::SRK, SystemZ::VSF); + createReplacer(SystemZ::LCGR, SystemZ::VLCG); + createReplacer(SystemZ::LCR, SystemZ::VLCF); + createReplacer(SystemZ::NRK, SystemZ::VN); + createReplacer(SystemZ::ORK, SystemZ::VO); + createReplacer(SystemZ::XRK, SystemZ::VX); + createReplacer(SystemZ::NGRK, SystemZ::VN); + createReplacer(SystemZ::OGRK, SystemZ::VO); + createReplacer(SystemZ::XGRK, SystemZ::VX); + createMulReplacer(SystemZ::MSRKC, SystemZ::VMEF, NoImmTy); + } + + // Shifts + if (EnableShifts || EnableAll) { + createShiftReplacer(SystemZ::SLLG, SystemZ::VESLG); + createShiftReplacer(SystemZ::SLLK, SystemZ::VESLF); + createShiftReplacer(SystemZ::SRLG, SystemZ::VESRLG); + createShiftReplacer(SystemZ::SRLK, SystemZ::VESRLF); + createShiftReplacer(SystemZ::SRAG, SystemZ::VESRAG); + createShiftReplacer(SystemZ::SRAK, SystemZ::VESRAF); + } + + // Immediate loads + if (EnableImmLoads || EnableAll) { + createImmLoadConverter(SystemZ::LGHI, SE16); + createImmLoadConverter(SystemZ::LHIMux, SE16); + createImmLoadConverter(SystemZ::LLILL, ZE16); + createImmLoadConverter(SystemZ::LLILH, ZELH16); + createImmLoadConverter(SystemZ::LLILF, ZE32); + createImmLoadConverter(SystemZ::LGFI, SE32); + createImmLoadConverter(SystemZ::IIFMux, UInt32); + } + + // Extensions of register + if (EnableRegExt || EnableAll) { + createRegExtReplacer(SystemZ::LGFR, SystemZ::VUPHF, SystemZ::VUPLF); + createRegExtReplacer(SystemZ::LLGFR, SystemZ::VUPLHF, SystemZ::VUPLLF); + createRegExtReplacer(SystemZ::LHR, SystemZ::VUPHH, SystemZ::VUPLHW); + createRegExtReplacer(SystemZ::LLHRMux, SystemZ::VUPLHH, SystemZ::VUPLLH); + } + + // Register with memory instructions + if (EnableRegMem || EnableAll) { + createRegMemReplacer(SystemZ::AG, SystemZ::VAG); + createRegMemReplacer(SystemZ::A, SystemZ::VAF); + createRegMemReplacer(SystemZ::SG, SystemZ::VSG); + createRegMemReplacer(SystemZ::S, SystemZ::VSF); + createRegMemReplacer(SystemZ::N, SystemZ::VN); + createRegMemReplacer(SystemZ::NG, SystemZ::VN); + createRegMemReplacer(SystemZ::O, SystemZ::VO); + createRegMemReplacer(SystemZ::OG, SystemZ::VO); + createRegMemReplacer(SystemZ::X, SystemZ::VX); + createRegMemReplacer(SystemZ::XG, SystemZ::VX); + createMulReplacer(SystemZ::MS, SystemZ::VMEF, NoImmTy); + } + + // Register with immediate instructions + if (EnableRegImm || EnableAll) { + createRegImmReplacer(SystemZ::AGHIK, SystemZ::VAG, SE16); + createRegImmReplacer(SystemZ::AHIMuxK, SystemZ::VAF, SE16); + createRegImmReplacer(SystemZ::AFIMux, SystemZ::VAF, SInt32); + createRegImmReplacer(SystemZ::OILMux, SystemZ::VO, ZE16); + createRegImmReplacer(SystemZ::OILL64, SystemZ::VO, ZE16); + createRegImmReplacer(SystemZ::NILMux, SystemZ::VN, ANDLow16); + createRegImmReplacer(SystemZ::NILL64, SystemZ::VN, ANDLow16); + createRegImmReplacer(SystemZ::NIFMux, SystemZ::VN, UInt32); + createRegImmReplacer(SystemZ::XIFMux, SystemZ::VX, UInt32); + // XXX: XILF64 + XILHF64 (-1) -> VLEIG + VX + createMulReplacer(SystemZ::MHI, SystemZ::VMEF, SE16); + createMulReplacer(SystemZ::MSFI, SystemZ::VMEF, SInt32); + } + + // Integer to FP conversions + if (EnableFPConv || EnableAll) { + createFPIntConvReplacer(SystemZ::CDGBR, SystemZ::WCDGB); + createFPIntConvReplacer(SystemZ::CDLGBR, SystemZ::WCDLGB); + if (STI->hasVectorEnhancements2()) { + createFPIntConvReplacer(SystemZ::CEFBR, SystemZ::WCEFB); + createFPIntConvReplacer(SystemZ::CELFBR, SystemZ::WCELFB); + } + + // FP to integer conversions + // TODO: Conversions with an extended/truncated result? + createFPIntConvReplacer(SystemZ::CGDBR, SystemZ::WCGDB); + createFPIntConvReplacer(SystemZ::CLGDBR, SystemZ::WCLGDB); + if (STI->hasVectorEnhancements2()) { + createFPIntConvReplacer(SystemZ::CFEBR, SystemZ::WCFEB); + createFPIntConvReplacer(SystemZ::CLFEBR, SystemZ::WCLFEB); + } + } + + // Comparisons. (Comparisons/TM reassigned to VTM did not seem to improve + // things - probably because VTM is slow. Therefore now reassigning to VEC + // instead). + if (EnableCompares || EnableAll) { + createCompareReplacer(SystemZ::CGR, SystemZ::VECG, NoImmTy); + createCompareReplacer(SystemZ::CR, SystemZ::VECF, NoImmTy); + createCompareReplacer(SystemZ::CG, SystemZ::VECG, NoImmTy); + createCompareReplacer(SystemZ::CMux, SystemZ::VECF, NoImmTy); + createCompareReplacer(SystemZ::CGHI, SystemZ::VECG, SE16); + createCompareReplacer(SystemZ::CHIMux, SystemZ::VECF, SE16); + + createCompareReplacer(SystemZ::CLGR, SystemZ::VECLG, NoImmTy); + createCompareReplacer(SystemZ::CLR, SystemZ::VECLF, NoImmTy); + createCompareReplacer(SystemZ::CLG, SystemZ::VECLG, NoImmTy); + createCompareReplacer(SystemZ::CLMux, SystemZ::VECLF, NoImmTy); + createCompareReplacer(SystemZ::CLGFI, SystemZ::VECLG, ZE32); + createCompareReplacer(SystemZ::CLFIMux, SystemZ::VECLF, UInt32); + } + + // Load Address + if (EnableLoadAddress || EnableAll) { + createLoadAddressReplacer(SystemZ::LA); + createLoadAddressReplacer(SystemZ::LAY); + } + + if (Experimental) { + createExperimentalReplacer(SystemZ::RISBGN); + createExperimentalReplacer(SystemZ::INSERT_SUBREG); + } +} + +void SystemZDomainReassignment::buildClosure(Closure &C, Register Reg) { + SmallPtrSet SeenInstrs; + SmallVector Worklist; + MachineInstr *DefMI = MRI->getVRegDef(Reg); + assert(DefMI && "Expected a def of virt reg."); + Worklist.push_back(DefMI); + while (!Worklist.empty()) { + MachineInstr *CurrMI = Worklist.pop_back_val(); + if (!SeenInstrs.insert(CurrMI).second) + continue; + + if (isConvertible(CurrMI, TII, MRI, C.Reasons)) { + LLVM_DEBUG(if (C.Instrs.empty() && FullDumpDomainReass) + dbgs() << "--- Calculating closure beginning with " << + "virtual register %" << Register::virtReg2Index(Reg) << ". ---\n";); + + C.Instrs.insert(CurrMI); + // Add defining instructions of use-operands to Worklist. + for (unsigned OpIdx = 0; OpIdx < CurrMI->getNumExplicitOperands(); + ++OpIdx) { + auto &Op = CurrMI->getOperand(OpIdx); + if (Op.isReg() && Op.isUse() && Op.getReg() && + isGPRDomain(Op.getReg()) && !usedInAddress(CurrMI, OpIdx)) { + MachineInstr *UseDefMI = MRI->getVRegDef(Op.getReg()); + assert(UseDefMI && "Expected a def of virt reg."); + Worklist.push_back(UseDefMI); + } + assert((!Op.isReg() || !Op.getReg() || + Register::isVirtualRegister(Op.getReg()) || CurrMI->isCopy()) && + "Expected only a COPY to use/define a phys-reg explicitly."); + } + } + else { + if (C.Instrs.empty()) + return; // Early exit if trying to insert first instruction. + C.Opcodes += TII->getName(CurrMI->getOpcode()).str() + " "; + C.Insertions.insert(CurrMI); + } + + // If CurrMI defines a register, insert it into closure and add users to + // Worklist. + if (Register DefReg = getDefedGPRReg(CurrMI, MRI)) { + C.Edges.insert(DefReg); + RegUses UseOps(DefReg, MRI); + + for (auto MO : UseOps.GPRUses) { + if (isConvertible(MO->getParent(), TII, MRI, C.Reasons)) + Worklist.push_back(MO->getParent()); + else if (C.Instrs.count(CurrMI)) { + C.Extractions.insert(CurrMI); + C.Opcodes += TII->getName(MO->getParent()->getOpcode()).str() + " "; + } + } + + if (!UseOps.AddrUses.empty() && C.Instrs.count(CurrMI)) { + C.Extractions.insert(CurrMI); + C.Reasons.insert(Reason::Address); + } + } + } // Worklist + + /// Find the vector lane to use for each reassigned register. + // Initialize each register to live in any lane. + for (Register Reg : C.Edges) + C.Lanes[Reg] = is64BitReg(Reg, MRI) ? (G0 | G1) : (F0 | F1 | F2 | F3); + // First iteration finds all possible lanes for each register. + LLVM_DEBUG(if (FullDumpDomainReass) + dbgs() << "--- Constraining vector lanes:\n";); + findVectorLanes(C, false/*ToFinal*/, MRI); + // Second iteration decides on the lane to use for each register. + if (C.Legal) { + LLVM_DEBUG(if (FullDumpDomainReass) + dbgs() << "--- Selecting vector lanes:\n";); + findVectorLanes(C, true/*ToFinal*/, MRI); + } + + LLVM_DEBUG(if (!C.Legal) { + dbgs() << "--- Invalidated Closure:\n"; + C.dump(MDT);}); +} + +void SystemZDomainReassignment::findVectorLanes(Closure &C, bool ToFinal, + const MachineRegisterInfo *MRI) { + bool Change = true; + while (Change) { + Change = false; + for (MachineInstr *MI : C.Instrs) { + std::list WList; + WList.push_back(MI); + while (!WList.empty()) { + MachineInstr *CurrMI = WList.front(); + WList.pop_front(); + if (!C.Instrs.count(CurrMI)) + continue; + + if (findConverter(CurrMI->getOpcode()) + ->findLanes(CurrMI, C.Lanes, WList, ToFinal, MRI)) + Change = true; + + // Check that all operands has at least one possible lane. + for (auto &Op : CurrMI->explicit_operands()) + if (Op.isReg() && C.Lanes.find(Op.getReg()) != C.Lanes.end() && + !C.Lanes[Op.getReg()]) { + C.Legal = false; + C.Reasons.insert(Reason::Lanes); + assert(!ToFinal && "Could not select lane."); + LLVM_DEBUG(dbgs() << "No lanes:"; CurrMI->dump();); + return; + } + } + } + } +} + +void SystemZDomainReassignment::reassign(Closure &C) const { + assert(C.Legal && "Cannot convert illegal closure"); + + // Do insertions and extractions before conversions in order to be able to + // find the original scalar users. + for (auto *MI : C.Insertions) + insertReg(MI, TII, MRI, C); + + for (auto *MI : C.Extractions) + extractReg(MI, TII, MRI, C, MLI); + + // Iterate all instructions in the closure, convert each one using the + // appropriate converter. + SmallVector ToErase; + for (auto *MI : C.Instrs) + if (findConverter(MI->getOpcode())->convertInstr(MI, TII, MRI, C.Lanes)) + ToErase.push_back(MI); + + // Iterate all registers in the closure, replace them with registers in the + // destination domain. + for (Register Reg : C.edges()) { + for (auto &MO : MRI->use_operands(Reg)) { + if (!MO.getSubReg()) + continue; + + // Allow COPY of subreg if it was inserted during conversion, for + // instance with a WCDGB that uses the subreg_h64 of the vector reg. + // These combinations of RC/SubReg should only occur as part of such + // a converion. + const TargetRegisterClass *RC = MRI->getRegClass(MO.getReg()); + unsigned SRIdx = MO.getSubReg(); + if (MO.getParent()->isCopy() && + ((RC == &SystemZ::GR64BitRegClass && SRIdx == SystemZ::subreg_h64) || + (RC == &SystemZ::GR32BitRegClass && SRIdx == SystemZ::subreg_h32))) + continue; + + // Remove all subregister references as they are not valid in the + // destination domain. + MO.setSubReg(0); + } + assert(isGPRDomain(Reg) && "Expected all regs in closure to be GPRs."); + MRI->setRegClass(Reg, &SystemZ::VR128BitRegClass); + } + +#ifndef NDEBUG + std::set RemovedEdges; + for (auto MI : ToErase) { + if (MI->getOpcode() == SystemZ::VLGVF || + MI->getOpcode() == SystemZ::VLGVH || + MI->getOpcode() == SystemZ::VLGVB) + RemovedEdges.insert(MI->getOperand(0).getReg()); + } +#endif + + for (auto MI : ToErase) + MI->eraseFromParent(); + +#ifndef NDEBUG + // VLGVX is simply removed... + for (Register Reg : C.edges()) + assert((RemovedEdges.count(Reg) || + MRI->getVRegDef(Reg) != nullptr) && "Expected preserved edges."); +#endif + + // Constrain the FPLaneRegs. + auto insertFPRegIntoVecReg = [&](MachineBasicBlock &InsMBB, + MachineBasicBlock::iterator InsPt, + Register FPReg) { + DebugLoc DL; + Register VTmp1 = MRI->createVirtualRegister(&SystemZ::VF128SavedRegClass); + Register VTmp2 = MRI->createVirtualRegister(&SystemZ::VF128SavedRegClass); + BuildMI(InsMBB, InsPt, DL, TII->get(TargetOpcode::IMPLICIT_DEF), VTmp1); + BuildMI(InsMBB, InsPt, DL, TII->get(TargetOpcode::INSERT_SUBREG), VTmp2) + .addReg(VTmp1).addReg(FPReg).addImm(SystemZ::subreg_h64); + return VTmp2; + }; + + for (Register Reg : C.FPLaneRegs) { + std::vector Uses; + for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) + Uses.push_back(&UseMI); + MRI->setRegClass(Reg, &SystemZ::VF128SavedRegClass); + MachineInstr *DefMI = MRI->getVRegDef(Reg); + Register FPReg = MRI->createVirtualRegister(&SystemZ::FP64BitRegClass); + MachineBasicBlock::iterator InsPt = DefMI->isPHI() ? + DefMI->getParent()->getFirstNonPHI() : std::next(DefMI->getIterator()); + BuildMI(*DefMI->getParent(), InsPt, DefMI->getDebugLoc(), + TII->get(SystemZ::COPY), FPReg) + .addReg(Reg, 0, SystemZ::subreg_h64); + for (auto UseMI : Uses) { + if (UseMI->isPHI()) { + for (unsigned Idx = 1, EE = UseMI->getNumOperands(); Idx != EE; + Idx += 2) { + Register InReg = UseMI->getOperand(Idx).getReg(); + if (InReg == Reg) { + MachineBasicBlock *MBB = UseMI->getOperand(Idx + 1).getMBB(); + Register VecReg = + insertFPRegIntoVecReg(*MBB, MBB->getFirstTerminator(), FPReg); + UseMI->getOperand(Idx).setReg(VecReg); + } + } + } else { + Register VecReg = + insertFPRegIntoVecReg(*UseMI->getParent(), UseMI, FPReg); + for (auto &Op : UseMI->explicit_operands()) + if (Op.isReg() && Op.getReg() == Reg) + Op.setReg(VecReg); + } + } + } +} + +void SystemZDomainReassignment:: +computeClosureCost(Closure &C, const MachineLoopInfo *MLI) { + bool HasVLGV = false; + bool EndsWithInt2FP = false; + + for (auto *MI : C.Instrs) { + std::set RDummie; + assert(isConvertible(MI, TII, MRI, RDummie) == !C.Insertions.count(MI) && + "Expected to be able to convert."); + // Ignore costs outside of innermost loop of reassignment. + if (MLI->getLoopDepth(MI->getParent()) < C.MaxLoopDepth) + continue; + ConvPtrRef Converter = findConverter(MI->getOpcode()); + int Cost = Converter->getExtraCost(MI, MRI); + C.TotalCost += Cost; + HasVLGV |= Cost == VLGVReassignCost; + if (Converter->isInt2FP()) + EndsWithInt2FP = true; + } + + MachineBasicBlock *InsMBB; + MachineBasicBlock::iterator InsPt; + for (auto *MI : C.Extractions) + if (!findExtractionInsPt(MI, MRI, C, MLI, InsMBB, InsPt)) { + C.Reasons.insert(Reason::Extraction); + C.Legal = false; + } + + if (!HasVLGV && OnlyWithVLGV) { + C.Reasons.insert(Reason::NoVLGV); + C.Legal = false; + } + + if (HasVLGV && ForceVLGV && C.Legal) { + C.WillReassign = true; + LLVM_DEBUG( dbgs() << "Closure will be reassigned (VLGV).\n";); + return; + } + + unsigned NumInsertionsCounted = 0; + for (auto *MI : C.Insertions) { + // Skip inserting an extraction (it would need special handling for lanes). + if (MI->getOpcode() == SystemZ::VLGVG || MI->getOpcode() == SystemZ::VLGVF || + MI->getOpcode() == SystemZ::VLGVH || MI->getOpcode() == SystemZ::VLGVB) { + C.Reasons.insert(Reason::InsertVLGV); + C.Legal = false; + } + // Ignore costs outside of innermost loop of reassignment. + if (MLI->getLoopDepth(MI->getParent()) < C.MaxLoopDepth) + continue; + NumInsertionsCounted++; + } + C.TotalCost += NumInsertionsCounted; + if (NumInsertionsCounted > MaxScalarInsertions) { + C.Reasons.insert(Reason::NumScalarInsertions); + C.Legal = false; + } + if (NumInsertionsCounted && EndsWithInt2FP) { + C.Reasons.insert(Reason::EndsWithInt2FP); + C.Legal = false; + } + // TODO: Currently reassignment with inserted scalar register is done even + // when it (scalar reg) is still needed outside C - beneficial? + + LLVM_DEBUG(if (FullDumpDomainReass) { + if (C.TotalCost > 0 || C.Reasons.count(Reason::HasCalls)) + C.dump(); + if (C.TotalCost != 0) + dbgs() << "Total extra cost: " << C.TotalCost << "\n\n"; + if (C.Reasons.count(Reason::HasCalls)) + dbgs() << "Calls inside closure - will not be reassigned.\n\n"; + }); +} + +bool SystemZDomainReassignment::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + STI = &MF.getSubtarget(); + TII = static_cast(STI->getInstrInfo()); + MRI = &MF.getRegInfo(); + MDT = &getAnalysis(); + MLI = &getAnalysis(); + + if (skipFunction(MF.getFunction()) || !STI->hasVector() || DisableDomReass) + return false; + assert(MRI->isSSA() && "Expected MIR to be in SSA form"); + + VirtRegLiveness VRLiveness; + VRLiveness.compute_and_setkills(MRI, MDT, &MF); + if (DumpVRLiveness) + VRLiveness.dumpMF(&MF); + + LLVM_DEBUG(if (FullDumpDomainReass) { + dbgs() << "***** Machine Function before Domain Reassignment *****\n"; + MF.print(dbgs()); }); + + // All edges that are included in some closure + DenseSet EnclosedEdges; +#ifndef NDEBUG + std::set EnclosedInstrs; +#endif + + std::vector Closures; // All legal closures found. + std::map Reg2ClosureIdx; // Regs in legal (saved) closures. + ElementImmLoader ImmLoader(MF, TII, MLI); + initConverters(ImmLoader); + + // Go over all virtual registers and calculate a closure. + for (unsigned Idx = 0; Idx < MRI->getNumVirtRegs(); ++Idx) { + Register Reg = Register::index2VirtReg(Idx); + + // Skip uninteresting regs. + if (MRI->reg_nodbg_empty(Reg) || EnclosedEdges.count(Reg) || + !isGPRDomain(Reg)) + continue; + + // Calculate closure starting with Reg. + Closure C; + buildClosure(C, Reg); + if (C.Instrs.empty() || + (C.Instrs.size() == 1 && + ((*C.Instrs.begin())->isImplicitDef() || (*C.Instrs.begin())->isPHI() || + (*C.Instrs.begin())->isInsertSubreg()))) + continue; + + // Keep track of all enclosed edges and instructions. + for (Register E : C.edges()) { + assert(!EnclosedEdges.count(E) && "Edge already in other closure."); + EnclosedEdges.insert(E); + } +#ifndef NDEBUG + for (MachineInstr *MI : C.Instrs) { + assert(!EnclosedInstrs.count(MI) && "Instruction already in other closure."); + EnclosedInstrs.insert(MI); + } +#endif + + // Find the max loop depth of any instruction in C. + // Find the min loop depth of any defining instruction in C: + for (MachineInstr *MI : C.Instrs) { + unsigned D = MLI->getLoopDepth(MI->getParent()); + C.MaxLoopDepth = std::max(C.MaxLoopDepth, D); + if (getDefedGPRReg(MI, MRI)) + C.DefsMinLoopDepth = std::min(C.DefsMinLoopDepth, D); + } + for (MachineInstr *MI : C.Insertions) { + unsigned D = MLI->getLoopDepth(MI->getParent()); + C.MaxLoopDepth = std::max(C.MaxLoopDepth, D); + if (getDefedGPRReg(MI, MRI)) + C.DefsMinLoopDepth = std::min(C.DefsMinLoopDepth, D); + } + + LLVM_DEBUG(if (C.Legal) { + dbgs() << "--- Legal closure found:\n"; + if (FullDumpDomainReass) + C.dump(MDT); + }); + // Collect all closures that can potentially be converted. + Closures.push_back(std::move(C)); + unsigned CIdx = Closures.size() - 1; + for (Register E : Closures.back().edges()) + Reg2ClosureIdx[E] = CIdx; + } + LLVM_DEBUG(dbgs() << "Total number of closures: " << Closures.size() << "\n\n";); + + // Check which closures cross any call. + for (auto &MBB : MF) { + LiveClosuresTracker LCT(Closures, VRLiveness, Reg2ClosureIdx); + LCT.enterMBB(&MBB, MLI, false/*IsolateLoops*/, false/*Dump*/); + for (const MachineInstr &MI : MBB) { + LCT.advance(&MI); + if (MI.isCall()) + LCT.processCall(&MI, MRI); + // TODO: Maybe add extra cost for reassigning in small blocks if extra + // instrs needed, like loading an immediate. (Could check for loop w/ + // preheader for immediate loads. + } + } + + // Compute the total cost for each Closure. + for (Closure &C : Closures) + computeClosureCost(C, MLI); + + // Iterate again over function and remove some closures that does not seem + // beneficial, like needing an insertion to then immediately store the reg. + // TODO: It is not yet clear which type of insertions are worth doing: + // simply rejecting closures that only insert and immediately use does not + // seem to be good. + + // Iterate over function and mark for reassignments where deemed beneficial. + std::set AlreadyDumped; + for (auto &MBB : MF) { + LiveClosuresTracker LCT(Closures, VRLiveness, Reg2ClosureIdx); + LCT.enterMBB(&MBB, MLI, true/*IsolateLoops*/); + for (const MachineInstr &MI : MBB) { + LCT.advance(&MI); + if (LCT.LiveGPRs.size() >= GPRLimit && LCT.NumLiveVecRegs < VecLimit) + LCT.pickOneClosure(AlreadyDumped, MRI); + } + LLVM_DEBUG( dbgs() << "\n";); + } + + // Handle reassigned edges across calls. + for (auto &MBB : MF) { + LiveClosuresTracker LCT(Closures, VRLiveness, Reg2ClosureIdx); + LCT.enterMBB(&MBB, MLI, true/*IsolateLoops*/, false/*Dump*/); + for (MachineInstr &MI : MBB) { + if (MI.isCall()) + LCT.findFPLanesAroundCall(&MI, MRI, TII, MLI); + LCT.advance(&MI); + } + } + + // Reassign profitable closures. + for (Closure &C : Closures) { + if (C.WillReassign) { + LLVM_DEBUG(dbgs() << "\n--- Reassigning closure: ---\n";); + LLVM_DEBUG(C.dump(MDT)); + reassign(C); + LLVM_DEBUG(dbgs() << "--- Closure reassigned. ---\n";); + Changed = true; + } + } + + LLVM_DEBUG(if (FullDumpDomainReass) { + dbgs() << "***** Machine Function after Domain Reassignment *****\n"; + MF.print(dbgs()); }); + + // Todo: make this optional or remove? + MF.verify(this, "After SystemZ Domain reassignment."); + + return Changed; +} + + +// Findings: +// - fp2int conversion + store seems to be done in GPRs +// - LA seemed potentially interesting, but seems mostly used with compares. + +// - Not sure X86 is iterating correctly over the "use operands of the users" + +// - Overflow implications? Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -334,6 +334,23 @@ // handled. The compare instruction is *not* changed. bool prepareCompareSwapOperands(MachineBasicBlock::iterator MBBI) const; + // Returns a reference to the MachineOperand of MI containing the CC mask. + MachineOperand& getCCMaskMO(MachineInstr *MI) const; + + // Same function but for const MachineInstrs. + const MachineOperand& getCCMaskMO(const MachineInstr *MI) const { + return getCCMaskMO(const_cast(MI)); + } + + // Find the users of CC defined by MBBI and put them in CCUsers. Return + // false if analysis failed (for instance in case of a live out CC). + bool findCCUsers(MachineBasicBlock::iterator MBBI, + SmallVector &CCUsers) const; + + // Same function but for const MachineInstrs. + bool findCCUsers(const MachineInstr *MI, + SmallVector &CCUsers) const; + // If Opcode is a LOAD opcode for with an associated LOAD AND TRAP // operation exists, returh the opcode for the latter, otherwise return 0. unsigned getLoadAndTrap(unsigned Opcode) const; Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -1604,7 +1604,8 @@ LoadOpcode = SystemZ::VL64; StoreOpcode = SystemZ::VST64; } else if (RC == &SystemZ::VF128BitRegClass || - RC == &SystemZ::VR128BitRegClass) { + RC == &SystemZ::VR128BitRegClass || + RC == &SystemZ::VF128SavedRegClass) { // XXX save 64 bits... LoadOpcode = SystemZ::VL; StoreOpcode = SystemZ::VST; } else @@ -1829,9 +1830,35 @@ MBBI->getOperand(1).isReg() && !MBBI->mayLoad() && "Not a compare reg/reg."); + SmallVector CCUsers; + if (!findCCUsers(MBBI, CCUsers)) + return false; + + // Update all CC users. + for (unsigned Idx = 0; Idx < CCUsers.size(); ++Idx) { + MachineOperand &CCMaskMO = getCCMaskMO(CCUsers[Idx]); + unsigned NewCCMask = SystemZ::reverseCCMask(CCMaskMO.getImm()); + CCMaskMO.setImm(NewCCMask); + } + + return true; +} + +MachineOperand& SystemZInstrInfo::getCCMaskMO(MachineInstr *MI) const { + assert(MI->readsRegister(SystemZ::CC) && "Expected CC use"); + unsigned Flags = MI->getDesc().TSFlags; + unsigned FirstOpNum = ((Flags & SystemZII::CCMaskFirst) ? + 0 : MI->getNumExplicitOperands() - 2); + MachineOperand &CCMaskMO = MI->getOperand(FirstOpNum + 1); + return CCMaskMO; +} + +bool SystemZInstrInfo:: +findCCUsers(MachineBasicBlock::iterator MBBI, + SmallVector &CCUsers) const { + assert(MBBI->definesRegister(SystemZ::CC) && "Expected CC def"); MachineBasicBlock *MBB = MBBI->getParent(); bool CCLive = true; - SmallVector CCUsers; for (MachineBasicBlock::iterator Itr = std::next(MBBI); Itr != MBB->end(); ++Itr) { if (Itr->readsRegister(SystemZ::CC)) { @@ -1852,17 +1879,17 @@ if (LiveRegs.contains(SystemZ::CC)) return false; } + return true; +} - // Update all CC users. - for (unsigned Idx = 0; Idx < CCUsers.size(); ++Idx) { - unsigned Flags = CCUsers[Idx]->getDesc().TSFlags; - unsigned FirstOpNum = ((Flags & SystemZII::CCMaskFirst) ? - 0 : CCUsers[Idx]->getNumExplicitOperands() - 2); - MachineOperand &CCMaskMO = CCUsers[Idx]->getOperand(FirstOpNum + 1); - unsigned NewCCMask = SystemZ::reverseCCMask(CCMaskMO.getImm()); - CCMaskMO.setImm(NewCCMask); - } - +bool SystemZInstrInfo:: +findCCUsers(const MachineInstr *MI, + SmallVector &CCUsers) const { + SmallVector CCUsers_tmp; + if (!findCCUsers(const_cast(MI), CCUsers_tmp)) + return false; + for (auto I : CCUsers_tmp) + CCUsers.push_back(I); return true; } Index: llvm/lib/Target/SystemZ/SystemZInstrVector.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -75,25 +75,27 @@ def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16_timm, 1>; def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16_timm, 2>; def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16_timm, 3>; - } - // Load element immediate. - // - // We want these instructions to be used ahead of VLVG* where possible. - // However, VLVG* takes a variable BD-format index whereas VLEI takes - // a plain immediate index. This means that VLVG* has an extra "base" - // register operand and is 3 units more complex. Bumping the complexity - // of the VLEI* instructions by 4 means that they are strictly better - // than VLVG* in cases where both forms match. - let AddedComplexity = 4 in { - def VLEIB : TernaryVRIa<"vleib", 0xE740, z_vector_insert, - v128b, v128b, imm32sx16trunc, imm32zx4>; - def VLEIH : TernaryVRIa<"vleih", 0xE741, z_vector_insert, - v128h, v128h, imm32sx16trunc, imm32zx3>; - def VLEIF : TernaryVRIa<"vleif", 0xE743, z_vector_insert, - v128f, v128f, imm32sx16, imm32zx2>; - def VLEIG : TernaryVRIa<"vleig", 0xE742, z_vector_insert, - v128g, v128g, imm64sx16, imm32zx1>; + // Load element immediate. + // + // We want these instructions to be used ahead of VLVG* where possible. + // However, VLVG* takes a variable BD-format index whereas VLEI takes + // a plain immediate index. This means that VLVG* has an extra "base" + // register operand and is 3 units more complex. Bumping the complexity + // of the VLEI* instructions by 4 means that they are strictly better + // than VLVG* in cases where both forms match. + // isAsCheapAsAMove is false or MachineLICM will not hoist out of loop. + // XXX Try variant without the tied source. + let AddedComplexity = 4, isAsCheapAsAMove = 0 in { + def VLEIB : TernaryVRIa<"vleib", 0xE740, z_vector_insert, + v128b, v128b, imm32sx16trunc, imm32zx4>; + def VLEIH : TernaryVRIa<"vleih", 0xE741, z_vector_insert, + v128h, v128h, imm32sx16trunc, imm32zx3>; + def VLEIF : TernaryVRIa<"vleif", 0xE743, z_vector_insert, + v128f, v128f, imm32sx16, imm32zx2>; + def VLEIG : TernaryVRIa<"vleig", 0xE742, z_vector_insert, + v128g, v128g, imm64sx16, imm32zx1>; + } } } Index: llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -187,6 +187,48 @@ } } + // Base implementation does not seem to give COPY-hints when subregs are + // involved. The hack to constrain "fp-lane vector regs" across calls + // involves copying the fp-lane into a FP64 reg and then constraining the + // vector reg (uses) to VF128Saved. Those COPYs must not be coalesced since + // then the vector reg would be live across the call (shouldCoalesce() + // returns false for them). Now try to make those COPYs go away as identity + // COPYs. + if (VRM != nullptr) { + SmallSet FPLaneHints; + for (auto &Use : MRI->reg_nodbg_instructions(VirtReg)) { + if (!Use.isCopy()) + continue; + const MachineOperand *VecOp = nullptr; + const MachineOperand *FPOp = nullptr; + for (auto &MO : Use.explicit_operands()) { + if (Register::isPhysicalRegister(MO.getReg())) + continue; + const TargetRegisterClass *RC = MRI->getRegClass(MO.getReg()); + if (RC == &SystemZ::VF128SavedRegClass && + MO.getSubReg() == SystemZ::subreg_h64) + VecOp = &MO; + else if (RC == &SystemZ::FP64BitRegClass) + FPOp = &MO; + } + if (VecOp != nullptr && FPOp != nullptr) { + if (VirtReg == VecOp->getReg() && VRM->hasPhys(FPOp->getReg())) { + Register AssignedFPReg = VRM->getPhys(FPOp->getReg()); + if (Register VecPhysReg = getMatchingSuperReg(AssignedFPReg, + SystemZ::subreg_h64, &SystemZ::VF128SavedRegClass)) + FPLaneHints.insert(VecPhysReg); + } + else if (VirtReg == FPOp->getReg() && VRM->hasPhys(VecOp->getReg())) { + Register AssignedVecReg = VRM->getPhys(VecOp->getReg()); + FPLaneHints.insert(getSubReg(AssignedVecReg, SystemZ::subreg_h64)); + } + } + } + for (MCPhysReg OrderReg : Order) + if (FPLaneHints.count(OrderReg)) + Hints.push_back(OrderReg); + } + return BaseImplRetVal; } @@ -345,6 +387,15 @@ LiveIntervals &LIS) const { assert (MI->isCopy() && "Only expecting COPY instructions"); + const MachineRegisterInfo *MRI = &MI->getParent()->getParent()->getRegInfo(); + Register DstReg = MI->getOperand(0).getReg(); + Register SrcReg = MI->getOperand(1).getReg(); + if ((MRI->getRegClass(DstReg) == &SystemZ::VF128SavedRegClass && + MRI->getRegClass(SrcReg) == &SystemZ::FP64BitRegClass) || + (MRI->getRegClass(DstReg) == &SystemZ::FP64BitRegClass && + MRI->getRegClass(SrcReg) == &SystemZ::VF128SavedRegClass)) + return false; + // Coalesce anything which is not a COPY involving a subreg to/from GR128. if (!(NewRC->hasSuperClassEq(&SystemZ::GR128BitRegClass) && (getRegSizeInBits(*SrcRC) <= 64 || getRegSizeInBits(*DstRC) <= 64))) Index: llvm/lib/Target/SystemZ/SystemZRegisterInfo.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZRegisterInfo.td +++ llvm/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -264,6 +264,11 @@ (sequence "V%u", 16, 31), (sequence "V%u", 8, 15))>; +// The vector registers which have the FP-reg part calle-saved. +def VF128Saved : RegisterClass<"SystemZ", + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, + (sequence "V%u", 8, 15)>; + // Attaches a ValueType to a register operand, to make the instruction // definitions easier. class TypedReg { Index: llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -252,7 +252,28 @@ return true; } +// EXPERIMENTAL +static cl::opt +DomainCleanups("domain-cleanups", cl::init(false), cl::Hidden); + void SystemZPassConfig::addPreRegAlloc() { + if (getOptLevel() != CodeGenOpt::None) { + addPass(createSystemZDomainReassignmentPass(getSystemZTargetMachine())); + + if (DomainCleanups) { + // Hoist immediate loads out of loops. This also removes identical VLEI + // instructions hoisted. + + // Unfortunately this seemed to degrade performance for some reason on + // several benchmarks, when used with patch, so disable by default for + // now. + addPass(&EarlyMachineLICMID); + + // Runninge this caused a regression on i557.xz_r (5%) + // addPass(&MachineCSEID); + } + } + addPass(createSystemZCopyPhysRegsPass(getSystemZTargetMachine())); } Index: llvm/test/CodeGen/SystemZ/buildvector-00.ll =================================================================== --- llvm/test/CodeGen/SystemZ/buildvector-00.ll +++ llvm/test/CodeGen/SystemZ/buildvector-00.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -disable-domreass | FileCheck %s ; Test that the dag combiner can understand that some vector operands are ; all-zeros and then optimize the logical operations. Index: llvm/test/CodeGen/SystemZ/dag-combine-01.ll =================================================================== --- llvm/test/CodeGen/SystemZ/dag-combine-01.ll +++ llvm/test/CodeGen/SystemZ/dag-combine-01.ll @@ -2,7 +2,7 @@ ; incorrectly drop a chain dependency to a store previously chained to ; one of two combined loads. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -disable-domreass | FileCheck %s @A = common global [2048 x float] zeroinitializer, align 4 Index: llvm/test/CodeGen/SystemZ/dag-combine-03.ll =================================================================== --- llvm/test/CodeGen/SystemZ/dag-combine-03.ll +++ llvm/test/CodeGen/SystemZ/dag-combine-03.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -disable-domreass < %s | FileCheck %s ; Test that DAGCombiner gets helped by getKnownBitsForTargetNode() when ; BITCAST nodes are involved on a big-endian target. Index: llvm/test/CodeGen/SystemZ/domain-reassignment-01.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/domain-reassignment-01.ll @@ -0,0 +1,242 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -debug-only=systemz-domain-reassignment \ +; RUN: -verify-machineinstrs -domain-gprlim=0 -memext 2>&1 | FileCheck %s +; REQUIRES: asserts +; +; Test domain reassignments of loads and stores. + +define void @fun0(i64* %Src, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun0: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VSTEG {{.*}}noreg, 0 + %Val = load i64, i64* %Src + store i64 %Val, i64* %Dst + ret void +} + +define void @fun1(i64* %Src, i64* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK: # Machine code for function fun1: IsSSA, TracksLiveness +; CHECK: --- offset +; CHECK-NOT: Legal closure found +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun1: IsSSA, TracksLiveness + %Ptr = getelementptr i64, i64* %Src, i64 1000 + %Val = load i64, i64* %Ptr + store i64 %Val, i64* %Dst + ret void +} + +define void @fun2(i64* %Src, i64* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK: # Machine code for function fun2: IsSSA, TracksLiveness +; CHECK: --- offset {{.*}} STG +; CHECK: Unable to reassign: Offset, Extraction{{.*}}STG + %Val = load i64, i64* %Src + %Ptr = getelementptr i64, i64* %Dst, i64 1000 + store i64 %Val, i64* %Ptr + ret void +} + +define i64 @fun3(i64* %Src, i64* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK: # Machine code for function fun3: IsSSA, TracksLiveness +; CHECK: --- physreg {{.*}} COPY +; CHECK: Unable to reassign: PhysRegOUT, Extraction{{.*}}COPY + %Val = load i64, i64* %Src + ret i64 %Val +} + +define void @fun4(i32* %Src, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun4: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VSTEF {{.*}}noreg, 0 + %Val = load i32, i32* %Src + store i32 %Val, i32* %Dst + ret void +} + +define void @fun5(i32* %Src, i32* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK: # Machine code for function fun5: IsSSA, TracksLiveness +; CHECK: --- offset {{.*}} LMux +; CHECK-NOT: Legal closure found +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun5: IsSSA, TracksLiveness + %Ptr = getelementptr i32, i32* %Src, i32 2000 + %Val = load i32, i32* %Ptr + store i32 %Val, i32* %Dst + ret void +} + +define void @fun6(i32* %Src, i32* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK: # Machine code for function fun6: IsSSA, TracksLiveness +; CHECK: --- offset {{.*}} STMux +; CHECK: Unable to reassign: Offset, Extraction{{.*}}STMux + %Val = load i32, i32* %Src + %Ptr = getelementptr i32, i32* %Dst, i32 2000 + store i32 %Val, i32* %Ptr + ret void +} + +define i32 @fun7(i32* %Src, i32* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK: # Machine code for function fun7: IsSSA, TracksLiveness +; CHECK: --- physreg {{.*}} COPY +; CHECK: Unable to reassign: PhysRegOUT, Extraction{{.*}}COPY + %Val = load i32, i32* %Src + ret i32 %Val +} + +;;; Truncating stores + +define void @fun8(i32* %Src, i16* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun8: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEF {{.*}}noreg, 0 +; CHECK-NEXT: VSTEH killed [[REG0]]{{.*}}noreg, 1 + %Val = load i32, i32* %Src + %Res = trunc i32 %Val to i16 + store i16 %Res, i16* %Dst + ret void +} + +define void @fun9(i32* %Src, i8* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun9: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEF {{.*}}noreg, 0 +; CHECK-NEXT: VSTEB killed [[REG0]]{{.*}}noreg, 3 + %Val = load i32, i32* %Src + %Res = trunc i32 %Val to i8 + store i8 %Res, i8* %Dst + ret void +} + +define void @fun10(i64* %Src, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun10: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEG {{.*}}noreg, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = COPY [[REG0]] +; CHECK-NEXT: VSTEF killed [[REG1]]{{.*}}noreg, 1 + %Val = load i64, i64* %Src + %Res = trunc i64 %Val to i32 + store i32 %Res, i32* %Dst + ret void +} + +define void @fun11(i64* %Src, i16* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun11: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEG {{.*}}noreg, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = COPY [[REG0]] +; CHECK-NEXT: VSTEH killed [[REG1]]{{.*}}noreg, 3 + %Val = load i64, i64* %Src + %Res = trunc i64 %Val to i16 + store i16 %Res, i16* %Dst + ret void +} + +define void @fun12(i64* %Src, i8* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun12: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEG {{.*}}noreg, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = COPY [[REG0]] +; CHECK-NEXT: VSTEB killed [[REG1]]{{.*}}noreg, 7 + %Val = load i64, i64* %Src + %Res = trunc i64 %Val to i8 + store i8 %Res, i8* %Dst + ret void +} + +;;; sign extending loads (allows just one unpack) + +; LGF +define void @fun13(i32* %Src, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun13: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEF {{.*}}noreg, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VUPHF [[REG0]] +; CHECK-NEXT: VSTEG killed [[REG1]]{{.*}}noreg, 0 + %Val = load i32, i32* %Src + %Res = sext i32 %Val to i64 + store i64 %Res, i64* %Dst + ret void +} + +; LHMux +define void @fun14(i16* %Src, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun14: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEH {{.*}}noreg, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VUPHH [[REG0]] +; CHECK-NEXT: VSTEF killed [[REG1]]{{.*}}noreg, 0 + %Val = load i16, i16* %Src + %Res = sext i16 %Val to i32 + store i32 %Res, i32* %Dst + ret void +} + +;;; zero exteding loads (VLLEZ) + +; LLGF +define void @fun15(i32* %Src, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun15: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLLEZF +; CHECK-NEXT: VSTEG killed [[REG0]]{{.*}}noreg, 0 + %Val = load i32, i32* %Src + %Res = zext i32 %Val to i64 + store i64 %Res, i64* %Dst + ret void +} + +; LLGH +define void @fun16(i16* %Src, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun16: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLLEZH +; CHECK-NEXT: VSTEG killed [[REG0]]{{.*}}noreg, 0 + %Val = load i16, i16* %Src + %Res = zext i16 %Val to i64 + store i64 %Res, i64* %Dst + ret void +} + +; LLGC +define void @fun17(i8* %Src, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun17: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLLEZB +; CHECK-NEXT: VSTEG killed [[REG0]]{{.*}}noreg, 0 + %Val = load i8, i8* %Src + %Res = zext i8 %Val to i64 + store i64 %Res, i64* %Dst + ret void +} + +; LLHMux +define void @fun18(i16* %Src, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun18: IsSSA, TracksLiveness +; CHECK: [[REG:%[0-9]+]]:vr128bit = VLLEZH +; CHECK-NEXT: VSTEF killed [[REG]]{{.*}}noreg, 1 + %Val = load i16, i16* %Src + %Res = zext i16 %Val to i32 + store i32 %Res, i32* %Dst + ret void +} + +; LLCMux +define void @fun19(i8* %Src, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun19: IsSSA, TracksLiveness +; CHECK: [[REG:%[0-9]+]]:vr128bit = VLLEZB +; CHECK-NEXT: VSTEF killed [[REG]]{{.*}}noreg, 1 + %Val = load i8, i8* %Src + %Res = zext i8 %Val to i32 + store i32 %Res, i32* %Dst + ret void +} + Index: llvm/test/CodeGen/SystemZ/domain-reassignment-02.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/domain-reassignment-02.ll @@ -0,0 +1,700 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -debug-only=systemz-domain-reassignment \ +; RUN: -verify-machineinstrs -domain-gprlim=0 2>&1 | FileCheck %s +; REQUIRES: asserts +; +; Test domain reassignments for arithmetic instructions + +; AG, AGRK +define void @fun0(i64* %Src0, i64* %Src1, i64* %Src2, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun0: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VAG +; CHECK: VAG +; CHECK: VSTEG {{.*}}noreg, 0 + %LHS = load i64, i64* %Src0 + %C = load volatile i64, i64* %Src2 + %RHS = load i64, i64* %Src1 + %Sum = add i64 %LHS, %RHS + %Res = add i64 %Sum, %C + store i64 %Res, i64* %Dst + ret void +} + +define void @fun1(i64* %Src0, i64* %Src1, i64* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun1: IsSSA, TracksLiveness +; CHECK: --- offset +; CHECK: Unable to reassign: Offset, Extraction{{.*}}AG +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun1: IsSSA, TracksLiveness + %Ptr = getelementptr i64, i64* %Src0, i64 1000 + %LHS = load i64, i64* %Src0 + %RHS = load i64, i64* %Ptr + %Sum = add i64 %LHS, %RHS + store i64 %Sum, i64* %Dst + ret void +} + +; A, ARK +define void @fun2(i32* %Src0, i32* %Src1, i32* %Src2, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun2: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VAF +; CHECK: VAF +; CHECK: VSTEF {{.*}}noreg, 0 + %LHS = load i32, i32* %Src0 + %C = load volatile i32, i32* %Src2 + %RHS = load i32, i32* %Src1 + %Sum = add i32 %LHS, %RHS + %Res = add i32 %Sum, %C + store i32 %Res, i32* %Dst + ret void +} + +define void @fun3(i32* %Src0, i32* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun3: IsSSA, TracksLiveness +; CHECK: Unable to reassign: Extraction{{.*}}AY +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun3: IsSSA, TracksLiveness + %Ptr = getelementptr i32, i32* %Src0, i32 2000 + %LHS = load i32, i32* %Src0 + %RHS = load i32, i32* %Ptr + %Sum = add i32 %LHS, %RHS + store i32 %Sum, i32* %Dst + ret void +} + +; SG, SGRK +define void @fun4(i64* %Src0, i64* %Src1, i64* %Src2, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun4: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VSG +; CHECK: VSG +; CHECK: VSTEG {{.*}}noreg, 0 + %LHS = load i64, i64* %Src0 + %C = load volatile i64, i64* %Src2 + %RHS = load i64, i64* %Src1 + %Dff = sub i64 %LHS, %RHS + %Res = sub i64 %Dff, %C + store i64 %Res, i64* %Dst + ret void +} + +define void @fun5(i64* %Src0, i64* %Src1, i64* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun5: IsSSA, TracksLiveness +; CHECK: --- offset {{.*}} SG +; CHECK: Unable to reassign: Offset, Extraction{{.*}}SG +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun5: IsSSA, TracksLiveness + %Ptr = getelementptr i64, i64* %Src0, i64 1000 + %LHS = load i64, i64* %Src0 + %RHS = load i64, i64* %Ptr + %Dff = sub i64 %LHS, %RHS + store i64 %Dff, i64* %Dst + ret void +} + +; S, SRK +define void @fun6(i32* %Src0, i32* %Src1, i32* %Src2, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun6: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VSF +; CHECK: VSF +; CHECK: VSTEF {{.*}}noreg, 0 + %LHS = load i32, i32* %Src0 + %C = load volatile i32, i32* %Src2 + %RHS = load i32, i32* %Src1 + %Dff = sub i32 %LHS, %RHS + %Res = sub i32 %Dff, %C + store i32 %Res, i32* %Dst + ret void +} + +define void @fun7(i32* %Src0, i32* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun7: IsSSA, TracksLiveness +; CHECK: Unable to reassign: Extraction{{.*}}SY +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun7: IsSSA, TracksLiveness + %Ptr = getelementptr i32, i32* %Src0, i32 2000 + %LHS = load i32, i32* %Src0 + %RHS = load i32, i32* %Ptr + %Dff = sub i32 %LHS, %RHS + store i32 %Dff, i32* %Dst + ret void +} + +; MS; MSRKC +define void @fun8(i32* %Src0, i32* %Src1, i32* %Src2, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun8: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+:vr128bit]] = VLEF {{.*}}noreg, 0 +; CHECK: [[REG1:%[0-9]+:vr128bit]] = VLEF {{.*}}noreg, 1 +; CHECK: [[REG2:%[0-9]+:vr128bit]] = VLEF {{.*}}noreg, 0 +; CHECK: [[REG3:%[0-9]+:vr128bit]] = VMEF [[REG0]], [[REG2]] +; CHECK: [[REG4:%[0-9]+:vr128bit]] = VMOF killed [[REG3]], killed [[REG1]] +; CHECK: VSTEF killed [[REG4]]{{.*}}noreg, 1 + %LHS = load i32, i32* %Src0 + %C = load volatile i32, i32* %Src2 + %RHS = load i32, i32* %Src1 + %Prd = mul i32 %LHS, %RHS + %Res = mul i32 %Prd, %C + store i32 %Res, i32* %Dst + ret void +} + +define void @fun9(i32* %Src0, i32* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun9: IsSSA, TracksLiveness +; CHECK: Unable to reassign: Extraction{{.*}}MSY +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun9: IsSSA, TracksLiveness + %Ptr = getelementptr i32, i32* %Src0, i32 2000 + %LHS = load i32, i32* %Src0 + %RHS = load i32, i32* %Ptr + %Prd = mul i32 %LHS, %RHS + store i32 %Prd, i32* %Dst + ret void +} + +; AGHIK +define void @fun10(i64* %Src0, i64* %Src1, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun10: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VSG +; CHECK: VLEIG {{.*}}-16, 0 +; CHECK: VSTEG {{.*}}noreg, 0 + %LHS = load i64, i64* %Src0 + %RHS = load i64, i64* %Src1 + %Sum = sub i64 %LHS, %RHS + %Res = add i64 %Sum, -16 + store i64 %Res, i64* %Dst + ret void +} + +; AHIMuxK +define void @fun11(i32* %Src0, i32* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun11: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VAF +; CHECK: VLEIF {{.*}}-16, 0 +; CHECK: VAF +; CHECK: VSTEF {{.*}}noreg, 0 + %LHS = load i32, i32* %Src0 + %RHS = load i32, i32* %Src1 + %Sum = add i32 %LHS, %RHS + %Res = add i32 %Sum, -16 + store i32 %Res, i32* %Dst + ret void +} + +; AFIMux +define void @fun11b(i32* %Src0, i32* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun11b: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VAF +; CHECK: VLEIH {{.*}} -17297, 1 +; CHECK: VLEIH {{.*}} 1, 0 +; CHECK: VAF +; CHECK: VSTEF {{.*}}noreg, 0 + %LHS = load i32, i32* %Src0 + %RHS = load i32, i32* %Src1 + %Sum = add i32 %LHS, %RHS + %Res = add i32 %Sum, 113775 + store i32 %Res, i32* %Dst + ret void +} + +; N +define void @fun12(i32* %Src0, i32* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun12: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VN +; CHECK: VSTEF {{.*}}noreg, 0 + %LHS = load i32, i32* %Src0 + %RHS = load i32, i32* %Src1 + %Res = and i32 %LHS, %RHS + store i32 %Res, i32* %Dst + ret void +} + +; NG +define void @fun13(i64* %Src0, i64* %Src1, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun13: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VN +; CHECK: VSTEG {{.*}}noreg, 0 + %LHS = load i64, i64* %Src0 + %RHS = load i64, i64* %Src1 + %Res = and i64 %LHS, %RHS + store i64 %Res, i64* %Dst + ret void +} + +; O +define void @fun14(i32* %Src0, i32* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun14: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VO +; CHECK: VSTEF {{.*}}noreg, 0 + %LHS = load i32, i32* %Src0 + %RHS = load i32, i32* %Src1 + %Res = or i32 %LHS, %RHS + store i32 %Res, i32* %Dst + ret void +} + +; OG +define void @fun15(i64* %Src0, i64* %Src1, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun15: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VO +; CHECK: VSTEG {{.*}}noreg, 0 + %LHS = load i64, i64* %Src0 + %RHS = load i64, i64* %Src1 + %Res = or i64 %LHS, %RHS + store i64 %Res, i64* %Dst + ret void +} + +; X +define void @fun16(i32* %Src0, i32* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun16: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VX +; CHECK: VSTEF {{.*}}noreg, 0 + %LHS = load i32, i32* %Src0 + %RHS = load i32, i32* %Src1 + %Res = xor i32 %LHS, %RHS + store i32 %Res, i32* %Dst + ret void +} + +; XG +define void @fun17(i64* %Src0, i64* %Src1, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun17: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VX +; CHECK: VSTEG {{.*}}noreg, 0 + %LHS = load i64, i64* %Src0 + %RHS = load i64, i64* %Src1 + %Res = xor i64 %LHS, %RHS + store i64 %Res, i64* %Dst + ret void +} + +; OILMux +define void @fun18(i32* %Src0, i32* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun18: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VAF +; CHECK: VLEIF {{.*}} 1, 0 +; CHECK: VO +; CHECK: VSTEF {{.*}}noreg, 0 + %LHS = load i32, i32* %Src0 + %RHS = load i32, i32* %Src1 + %Sum = add i32 %LHS, %RHS + %Res = or i32 %Sum, 1 + store i32 %Res, i32* %Dst + ret void +} + +define void @fun19(i32* %Src0, i32* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun19: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VAF +; CHECK: VLEIH {{.*}} -32768, 1 +; CHECK: VLEIH {{.*}} 0, 0 +; CHECK: VO +; CHECK: VSTEF {{.*}}noreg, 0 + %LHS = load i32, i32* %Src0 + %RHS = load i32, i32* %Src1 + %Sum = add i32 %LHS, %RHS + %Res = or i32 %Sum, 32768 + store i32 %Res, i32* %Dst + ret void +} + +; OILL64 +define void @fun20(i64* %Src0, i64* %Src1, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun20: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VAG +; CHECK: VLEIG {{.*}}, 1, 0 +; CHECK: VO +; CHECK: VSTEG {{.*}}noreg, 0 + %LHS = load i64, i64* %Src0 + %RHS = load i64, i64* %Src1 + %Sum = add i64 %LHS, %RHS + %Res = or i64 %Sum, 1 + store i64 %Res, i64* %Dst + ret void +} + +; Immediate load with VLEIG 0 (Hi16 == 0, Lo16:15 == 1) +define void @fun20_b(i64* %Src0, i64* %Src1, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun20_b: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VAG +; CHECK: VLEIG {{.*}}, 0, 0 +; CHECK: VLEIH {{.*}}, -32752, 3 +; CHECK: VO +; CHECK: VSTEG {{.*}}noreg, 0 + %LHS = load i64, i64* %Src0 + %RHS = load i64, i64* %Src1 + %Sum = add i64 %LHS, %RHS + %Res = or i64 %Sum, 32784 + store i64 %Res, i64* %Dst + ret void +} + +; NILMux +define void @fun21(i32* %Src0, i32* %Dst, i32* %Dst1) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun21: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK-NEXT: VSTEF +; CHECK: VLEIF {{.*}} -3, 0 +; CHECK: VN +; CHECK: VSTEF {{.*}}noreg, 0 + %i = load i32, i32* %Src0 + store i32 %i, i32* %Dst1 + %i5 = and i32 %i, -3 + store i32 %i5, i32* %Dst + ret void +} + +define void @fun22(i32* %Src0, i32* %Dst, i32* %Dst1) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun22: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK-NEXT: VSTEF +; CHECK: VLEIH {{.*}} 16, 1 +; CHECK: VLEIH {{.*}} -1, 0 +; CHECK: VN +; CHECK: VSTEF {{.*}}noreg, 0 + %i = load i32, i32* %Src0 + store i32 %i, i32* %Dst1 + %i5 = and i32 %i, -65520 + store i32 %i5, i32* %Dst + ret void +} + +; NILL64 +define void @fun23(i64* %Src0, i64* %Dst, i64* %Dst1) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun23: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK-NEXT: VSTEG +; CHECK: VLEIG {{.*}} -3, 0 +; CHECK: VN +; CHECK: VSTEG {{.*}}noreg, 0 + %i = load i64, i64* %Src0 + store i64 %i, i64* %Dst1 + %i5 = and i64 %i, -3 + store i64 %i5, i64* %Dst + ret void +} + +; Immediate load with VLEIG -1 (Hi16 == -1, Lo16:15 == 0) +define void @fun24(i64* %Src0, i64* %Dst, i64* %Dst1) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun24: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK-NEXT: VSTEG +; CHECK: VLEIG {{.*}} -1, 0 +; CHECK: VLEIH {{.*}} 16, 3 +; CHECK: VN +; CHECK: VSTEG {{.*}}noreg, 0 + %i = load i64, i64* %Src0 + store i64 %i, i64* %Dst1 + %i5 = and i64 %i, -65520 + store i64 %i5, i64* %Dst + ret void +} + +; NIFMux +define void @fun25(i16* %Src, i16* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun25: IsSSA, TracksLiveness +; CHECK: VLLEZH +; CHECK: VLEIF {{.*}} 1, 1 +; CHECK: VN +; CHECK: VSTEH {{.*}}noreg, 3 + %i = load i16, i16* %Src, align 2 + %i2 = and i16 %i, 1 + store i16 %i2, i16* %Dst + ret void +} + +define void @fun26(i32* %Src, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun26: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEIH {{.*}} -256, 1 +; CHECK: VLEIH {{.*}} 0, 0 +; CHECK: VN +; CHECK: VSTEF {{.*}}noreg, 0 +bb: + %i = load i32, i32* %Src + br label %bb1 + +bb1: + %i2 = and i32 %i, 65280 + store i32 %i2, i32* %Dst + ret void +} + +; ORK +define void @fun27(i32* %Src0, i32* %Src1, i32* %Src2, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun27: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VAF +; CHECK: VO +; CHECK: VSTEF {{.*}}noreg, 0 + %LHS = load i32, i32* %Src0 + %C = load volatile i32, i32* %Src2 + %RHS = load i32, i32* %Src1 + %Sum = add i32 %LHS, %RHS + %Res = or i32 %Sum, %C + store i32 %Res, i32* %Dst + ret void +} + +; NRK +define void @fun28(i32* %Src0, i32* %Src1, i32* %Src2, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun28: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VAF +; CHECK: VN +; CHECK: VSTEF {{.*}}noreg, 0 + %LHS = load i32, i32* %Src0 + %C = load volatile i32, i32* %Src2 + %RHS = load i32, i32* %Src1 + %Sum = add i32 %LHS, %RHS + %Res = and i32 %Sum, %C + store i32 %Res, i32* %Dst + ret void +} + +; XRK +define void @fun028(i32* %Src0, i32* %Src1, i32* %Src2, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun028: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VAF +; CHECK: VX +; CHECK: VSTEF {{.*}}noreg, 0 + %LHS = load i32, i32* %Src0 + %C = load volatile i32, i32* %Src2 + %RHS = load i32, i32* %Src1 + %Sum = add i32 %LHS, %RHS + %Res = xor i32 %Sum, %C + store i32 %Res, i32* %Dst + ret void +} + +; XIFMux +define void @fun29(i32* %Src0, i32* %Src1, i32* %Src2, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun29: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VAF +; CHECK: VLEIF {{.*}}, -1, 0 +; CHECK: VX +; CHECK: VSTEF {{.*}}noreg, 0 + %LHS = load i32, i32* %Src0 + %C = load volatile i32, i32* %Src2 + %RHS = load i32, i32* %Src1 + %Sum = add i32 %LHS, %RHS + %Res = xor i32 %Sum, -1 + store i32 %Res, i32* %Dst + ret void +} + +; NGRK +define void @fun30(i64* %Src0, i64* %Src1, i64* %Src2, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun30: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VAG +; CHECK: VN +; CHECK: VSTEG {{.*}}noreg, 0 + %LHS = load i64, i64* %Src0 + %C = load volatile i64, i64* %Src2 + %RHS = load i64, i64* %Src1 + %Sum = add i64 %LHS, %RHS + %Res = and i64 %Sum, %C + store i64 %Res, i64* %Dst + ret void +} + +; OGRK +define void @fun31(i64* %Src0, i64* %Src1, i64* %Src2, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun31: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VAG +; CHECK: VO +; CHECK: VSTEG {{.*}}noreg, 0 + %LHS = load i64, i64* %Src0 + %C = load volatile i64, i64* %Src2 + %RHS = load i64, i64* %Src1 + %Sum = add i64 %LHS, %RHS + %Res = or i64 %Sum, %C + store i64 %Res, i64* %Dst + ret void +} + +; XGRK +define void @fun32(i64* %Src0, i64* %Src1, i64* %Src2, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun32: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VAG +; CHECK: VX +; CHECK: VSTEG {{.*}}noreg, 0 + %LHS = load i64, i64* %Src0 + %C = load volatile i64, i64* %Src2 + %RHS = load i64, i64* %Src1 + %Sum = add i64 %LHS, %RHS + %Res = xor i64 %Sum, %C + store i64 %Res, i64* %Dst + ret void +} + +; MHI -> VMEF +define void @fun33(i32* %Src0, i32* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun33: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+:vr128bit]] = VLEF {{.*}}noreg, 0 +; CHECK: [[REG1:%[0-9]+:vr128bit]] = VLEF {{.*}}noreg, 0 +; CHECK: [[REG2:%[0-9]+:vr128bit]] = VAF [[REG0]], [[REG1]] +; CHECK: [[REG3:%[0-9]+:vr128bit]] = VLEIF {{.*}}-3, 0 +; CHECK: [[REG4:%[0-9]+:vr128bit]] = VMEF [[REG2]], [[REG3]] +; CHECK: VSTEF killed [[REG4]]{{.*}}noreg, 1 + %LHS = load i32, i32* %Src0 + %RHS = load i32, i32* %Src1 + %Sum = add i32 %LHS, %RHS + %Res = mul i32 %Sum, -3 + store i32 %Res, i32* %Dst + ret void +} + +; MHI -> VMOF +define void @fun34(i64* %Src0, i32* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun34: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+:vr128bit]] = VLEG {{.*}}noreg, 0 +; CHECK: [[REG1:%[0-9]+:vr128bit]] = COPY [[REG0]] +; CHECK: [[REG2:%[0-9]+:vr128bit]] = VLEF {{.*}}noreg, 1 +; CHECK: [[REG3:%[0-9]+:vr128bit]] = VAF [[REG1]], [[REG2]] +; CHECK: [[REG4:%[0-9]+:vr128bit]] = VLEIF {{.*}}-3, 1 +; CHECK: [[REG5:%[0-9]+:vr128bit]] = VMOF [[REG3]], [[REG4]] +; CHECK: VSTEF killed [[REG5]]{{.*}}noreg, 1 + %L0 = load volatile i64, i64* %Src0 + %T0 = trunc i64 %L0 to i32 + + %RHS = load i32, i32* %Src1 + %Sum = add i32 %T0, %RHS + %Res = mul i32 %Sum, -3 + store i32 %Res, i32* %Dst + ret void +} + +; MSFI -> VMEF +define void @fun35(i32* %Src0, i32* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun35: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+:vr128bit]] = VLEF {{.*}}noreg, 0 +; CHECK: [[REG1:%[0-9]+:vr128bit]] = VLEF {{.*}}noreg, 0 +; CHECK: [[REG2:%[0-9]+:vr128bit]] = VAF [[REG0]], [[REG1]] +; CHECK: [[REG3:%[0-9]+:vr128bit]] = VLEIH {{.*}}vr128bit(tied-def 0), -1, 1 +; CHECK: [[REG4:%[0-9]+:vr128bit]] = VLEIH [[REG3]](tied-def 0), 15, 0 +; CHECK: [[REG5:%[0-9]+:vr128bit]] = VMEF [[REG2]], [[REG4]] +; CHECK: VSTEF killed [[REG5]]{{.*}}noreg, 1 + %LHS = load i32, i32* %Src0 + %RHS = load i32, i32* %Src1 + %Sum = add i32 %LHS, %RHS + %Res = mul i32 %Sum, 1048575 + store i32 %Res, i32* %Dst + ret void +} + +; MSFI -> VMOF +define void @fun36(i64* %Src0, i32* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun36: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+:vr128bit]] = VLEG {{.*}}noreg, 0 +; CHECK: [[REG1:%[0-9]+:vr128bit]] = COPY [[REG0]] +; CHECK: [[REG2:%[0-9]+:vr128bit]] = VLEF {{.*}}noreg, 1 +; CHECK: [[REG3:%[0-9]+:vr128bit]] = VAF [[REG1]], [[REG2]] +; CHECK: [[REG4:%[0-9]+:vr128bit]] = VLEIH {{.*}}vr128bit(tied-def 0), -7616, 3 +; CHECK: [[REG5:%[0-9]+:vr128bit]] = VLEIH [[REG4]](tied-def 0), 1, 2 +; CHECK: [[REG6:%[0-9]+:vr128bit]] = VMOF [[REG3]], [[REG5]] +; CHECK: VSTEF killed [[REG6]]{{.*}}noreg, 1 + %L0 = load volatile i64, i64* %Src0 + %T0 = trunc i64 %L0 to i32 + + %RHS = load i32, i32* %Src1 + %Sum = add i32 %T0, %RHS + %Res = mul i32 %Sum, 123456 + store i32 %Res, i32* %Dst + ret void +} Index: llvm/test/CodeGen/SystemZ/domain-reassignment-03.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/domain-reassignment-03.ll @@ -0,0 +1,75 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -debug-only=systemz-domain-reassignment \ +; RUN: -verify-machineinstrs -domain-gprlim=0 2>&1 | FileCheck %s +; REQUIRES: asserts +; +; Test domain reassignments for register extensions (one unpack only). + +; LGFR +define void @fun0(i32* %Src1, i32* %Src2, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun0: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK-NEXT: [[REG0:%[0-9]+]]:vr128bit = VAF +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VUPHF killed [[REG0]] +; CHECK-NEXT: VSTEG killed [[REG1]]{{.*}}noreg, 0 + %LHS = load i32, i32* %Src1 + %RHS = load i32, i32* %Src2 + %Sum = add i32 %LHS, %RHS + %ext = sext i32 %Sum to i64 + store i64 %ext, i64* %Dst + ret void +} + +; LHR +define void @fun1(i32* %Src, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun1: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEF {{.*}}noreg, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VUPHH killed [[REG0]] +; CHECK-NEXT: VSTEF killed [[REG1]]{{.*}}noreg, 1 + %L = load volatile i32, i32* %Src + %T = trunc i32 %L to i16 + %ext = sext i16 %T to i32 + store i32 %ext, i32* %Dst + ret void +} + +; LLGFR +define void @fun2(i32* %Src1, i32* %Src2, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun2: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK-NEXT: [[REG0:%[0-9]+]]:vr128bit = VAF +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VUPLHF killed [[REG0]] +; CHECK-NEXT: VSTEG killed [[REG1]]{{.*}}noreg, 0 + %LHS = load i32, i32* %Src1 + %RHS = load i32, i32* %Src2 + %Sum = add i32 %LHS, %RHS + %ext = zext i32 %Sum to i64 + store i64 %ext, i64* %Dst + ret void +} + +; LLHRMux +define void @fun3(i32* %Src0, i32* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun3: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VAF +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEIF {{.*}}-16, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VAF {{.*}} [[REG0]] +; CHECK-NEXT: [[REG2:%[0-9]+]]:vr128bit = VUPLHH killed [[REG1]] +; CHECK-NEXT: VSTEF killed [[REG2]]{{.*}}noreg, 1 + %LHS = load i32, i32* %Src0 + %RHS = load i32, i32* %Src1 + %Sum = add i32 %LHS, %RHS + %AddI = add i32 %Sum, -16 + %T = trunc i32 %AddI to i16 + %ext = zext i16 %T to i32 + store i32 %ext, i32* %Dst + ret void +} + Index: llvm/test/CodeGen/SystemZ/domain-reassignment-04.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/domain-reassignment-04.ll @@ -0,0 +1,221 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -debug-only=systemz-domain-reassignment \ +; RUN: -verify-machineinstrs -domain-gprlim=0 2>&1 | FileCheck %s +; REQUIRES: asserts +; +; Test domain reassignments of immediate loads. + +; 16 bits signed extended to 64 bits: LGHI +define void @fun0(i64* %Dst, i64 %Base, i64 %Idx) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun0: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEIG {{.*}}, 0, 0 +; CHECK-NEXT: VSTEG killed [[REG0]]{{.*}} 0,{{.*}}, 0 + %tmp = add nsw i64 %Base, %Idx + %tmp1 = inttoptr i64 %tmp to i64* + store i64 0, i64* %tmp1 + ret void +} + +define void @fun1(i64* %Dst, i64 %Base, i64 %Idx) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun1: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEIG {{.*}}, 32767, 0 +; CHECK-NEXT: VSTEG killed [[REG0]]{{.*}} 0,{{.*}}, 0 + %tmp = add nsw i64 %Base, %Idx + %tmp1 = inttoptr i64 %tmp to i64* + store i64 32767, i64* %tmp1 + ret void +} + +define void @fun2(i64* %Dst, i64 %Base, i64 %Idx) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun2: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEIG {{.*}}, -32768, 0 +; CHECK-NEXT: VSTEG killed [[REG0]]{{.*}} 0,{{.*}}, 0 + %tmp = add nsw i64 %Base, %Idx + %tmp1 = inttoptr i64 %tmp to i64* + store i64 -32768, i64* %tmp1 + ret void +} + +; 16 bits signed extended to 32 bits: LHIMux +define void @fun3(i32* %Dst, i64 %Base, i64 %Idx) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun3: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEIF {{.*}}, 0, 0 +; CHECK-NEXT: VSTEF killed [[REG0]]{{.*}} 0,{{.*}}, 0 + %tmp = add nsw i64 %Base, %Idx + %tmp1 = inttoptr i64 %tmp to i32* + store i32 0, i32* %tmp1 + ret void +} + +define void @fun4(i32* %Dst, i64 %Base, i64 %Idx) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun4: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEIF {{.*}}, 32767, 0 +; CHECK-NEXT: VSTEF killed [[REG0]]{{.*}} 0,{{.*}}, 0 + %tmp = add nsw i64 %Base, %Idx + %tmp1 = inttoptr i64 %tmp to i32* + store i32 32767, i32* %tmp1 + ret void +} + +define void @fun5(i32* %Dst, i64 %Base, i64 %Idx) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun5: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEIF {{.*}}, -32768, 0 +; CHECK-NEXT: VSTEF killed [[REG0]]{{.*}} 0,{{.*}}, 0 + %tmp = add nsw i64 %Base, %Idx + %tmp1 = inttoptr i64 %tmp to i32* + store i32 -32768, i32* %tmp1 + ret void +} + +; 32 bits signed extended to 64 bits +; High32=0, Hi16=0, Lo16:b15=1 +define void @fun6(i64* %Dst, i64 %Base, i64 %Idx) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun6: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEIG {{.*}}, 0, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VLEIH [[REG0]]{{.*}}, -32768, 3 +; CHECK-NEXT: VSTEG killed [[REG1]]{{.*}} 0,{{.*}}, 0 + %tmp = add nsw i64 %Base, %Idx + %tmp1 = inttoptr i64 %tmp to i64* + store i64 32768, i64* %tmp1 + ret void +} + +; High32=0, Hi16=2047, Lo16=0 +define void @fun7(i64* %Dst, i64 %Base, i64 %Idx) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun7: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEIG {{.*}}, 0, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VLEIH [[REG0]]{{.*}}, 2047, 2 +; CHECK-NEXT: VSTEG killed [[REG1]]{{.*}} 0,{{.*}}, 0 + %tmp = add nsw i64 %Base, %Idx + %tmp1 = inttoptr i64 %tmp to i64* + store i64 134152192, i64* %tmp1 + ret void +} + +; High32=0, Hi16=2047, Lo16:b15=0 +define void @fun8(i64* %Dst, i64 %Base, i64 %Idx) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun8: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEIG {{.*}}, 16, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VLEIH [[REG0]]{{.*}}, 2047, 2 +; CHECK-NEXT: VSTEG killed [[REG1]]{{.*}} 0,{{.*}}, 0 + %tmp = add nsw i64 %Base, %Idx + %tmp1 = inttoptr i64 %tmp to i64* + store i64 134152208, i64* %tmp1 + ret void +} + +; High32=-1, Hi16=-1, Lo16:b15=0 +define void @fun9(i64* %Dst, i64 %Base, i64 %Idx) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun9: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEIG {{.*}}, -1, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VLEIH [[REG0]]{{.*}}, 32767, 3 +; CHECK-NEXT: VSTEG killed [[REG1]]{{.*}} 0,{{.*}}, 0 + %tmp = add nsw i64 %Base, %Idx + %tmp1 = inttoptr i64 %tmp to i64* + store i64 -32769, i64* %tmp1 + ret void +} + +; High32=-1, Hi16=1, Lo16=-1 +define void @fun10(i64* %Dst, i64 %Base, i64 %Idx) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK: # Machine code for function fun10: IsSSA, TracksLiveness +; CHECK-NOT: Legal closure found +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun10: IsSSA, TracksLiveness + %tmp = add nsw i64 %Base, %Idx + %tmp1 = inttoptr i64 %tmp to i64* + store i64 -4294836225, i64* %tmp1 + ret void +} + +; High32=-1, Hi16=1, Lo16:b15=1 +define void @fun11(i64* %Dst, i64 %Base, i64 %Idx) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK: # Machine code for function fun11: IsSSA, TracksLiveness +; CHECK-NOT: Legal closure found +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun11: IsSSA, TracksLiveness + %tmp = add nsw i64 %Base, %Idx + %tmp1 = inttoptr i64 %tmp to i64* + store i64 -4294868992, i64* %tmp1 + ret void +} + +; High32=0, Hi16=1, Lo16:b15=1 +define void @fun12(i64* %Dst, i64 %Base, i64 %Idx) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun12: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEIF {{.*}}, 0, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VLEIH [[REG0]]{{.*}}, -32768, 3 +; CHECK: [[REG2:%[0-9]+]]:vr128bit = VLEIH [[REG1]]{{.*}}, 1, 2 +; CHECK-NEXT: VSTEG killed [[REG2]]{{.*}} 0,{{.*}}, 0 + %tmp = add nsw i64 %Base, %Idx + %tmp1 = inttoptr i64 %tmp to i64* + store i64 98304, i64* %tmp1 + ret void +} + +; High32=-1, Hi16=1, Lo16:b15=0 +define void @fun13(i64* %Dst, i64 %Base, i64 %Idx) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK: # Machine code for function fun13: IsSSA, TracksLiveness +; CHECK-NOT: Legal closure found +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun13: IsSSA, TracksLiveness + %tmp = add nsw i64 %Base, %Idx + %tmp1 = inttoptr i64 %tmp to i64* + store i64 -4294873088, i64* %tmp1 + ret void +} + +; 32 bits zero extended to 64 bits +; Hi16=-1 Lo16=-4 +define void @fun14(i64* %Dst, i64 %Base, i64 %Idx) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun14: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEIF {{.*}}, 0, 0 +; CHECK: [[REG1:%[0-9]+]]:vr128bit = VLEIF [[REG0]]{{.*}}, -4, 1 +; CHECK-NEXT: VSTEG killed [[REG1]]{{.*}} 0,{{.*}}, 0 + %tmp = add nsw i64 %Base, %Idx + %tmp1 = inttoptr i64 %tmp to i64* + store i64 4294967292, i64* %tmp1 + ret void +} + +; Hi16=-16 Lo16=-4 +define void @fun15(i64* %Dst, i64 %Base, i64 %Idx) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun15: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEIF {{.*}}, 0, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VLEIH [[REG0]]{{.*}}, -4, 3 +; CHECK-NEXT: [[REG2:%[0-9]+]]:vr128bit = VLEIH [[REG1]]{{.*}}, -16, 2 +; CHECK-NEXT: VSTEG killed [[REG2]]{{.*}} 0,{{.*}}, 0 + %tmp = add nsw i64 %Base, %Idx + %tmp1 = inttoptr i64 %tmp to i64* + store i64 4293984252, i64* %tmp1 + ret void +} + +; 32 bit immediate +; Hi16 = 16384, Lo16 = 0 +define void @fun16(i64* %Dst, i64 %Base, i64 %Idx) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun16: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEIH{{.*}}, 0, 1 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VLEIH [[REG0]]{{.*}}, 16384, 0 +; CHECK-NEXT: VSTEF killed [[REG1]]{{.*}} 0,{{.*}}, 0 + %tmp = add nsw i64 %Base, %Idx + %tmp1 = inttoptr i64 %tmp to i32* + store i32 1073741824, i32* %tmp1 + ret void +} Index: llvm/test/CodeGen/SystemZ/domain-reassignment-05.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/domain-reassignment-05.ll @@ -0,0 +1,117 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -debug-only=systemz-domain-reassignment \ +; RUN: -verify-machineinstrs -domain-gprlim=0 2>&1 | FileCheck %s +; REQUIRES: asserts +; +; Test domain reassignments for logical instructions. + +; SLLK +define void @fun0(i32* %Src0, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun0: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEF {{.*}}noreg, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VESLF killed [[REG0]] +; CHECK-NEXT: VSTEF killed [[REG1]]{{.*}}noreg, 0 + %Val = load i32, i32* %Src0 + %Res = shl i32 %Val, 2 + store i32 %Res, i32* %Dst + ret void +} + +; SLLG +define void @fun1(i64* %Src0, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun1: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEG {{.*}}noreg, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VESLG killed [[REG0]] +; CHECK-NEXT: VSTEG killed [[REG1]]{{.*}}noreg, 0 + %Val = load i64, i64* %Src0 + %Res = shl i64 %Val, 2 + store i64 %Res, i64* %Dst + ret void +} + +; SRLK +define void @fun2(i32* %Src0, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun2: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEF {{.*}}noreg, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VESRLF killed [[REG0]] +; CHECK-NEXT: VSTEF killed [[REG1]]{{.*}}noreg, 0 + %Val = load i32, i32* %Src0 + %Res = lshr i32 %Val, 2 + store i32 %Res, i32* %Dst + ret void +} + +; SRLG +define void @fun3(i64* %Src0, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun3: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEG {{.*}}noreg, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VESRLG killed [[REG0]] +; CHECK-NEXT: VSTEG killed [[REG1]]{{.*}}noreg, 0 + %Val = load i64, i64* %Src0 + %Res = lshr i64 %Val, 2 + store i64 %Res, i64* %Dst + ret void +} + +; SRAK +define void @fun4(i32* %Src0, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun4: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEF {{.*}}noreg, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VESRAF killed [[REG0]] +; CHECK-NEXT: VSTEF killed [[REG1]]{{.*}}noreg, 0 + %Val = load i32, i32* %Src0 + %Res = ashr i32 %Val, 2 + store i32 %Res, i32* %Dst + ret void +} + +; SRAG +define void @fun5(i64* %Src0, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun5: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEG {{.*}}noreg, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VESRAG killed [[REG0]] +; CHECK-NEXT: VSTEG killed [[REG1]]{{.*}}noreg, 0 + %Val = load i64, i64* %Src0 + %Res = ashr i64 %Val, 2 + store i64 %Res, i64* %Dst + ret void +} + +; LCGR +define void @fun6(i64* %Src0, i64* %Src1, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun6: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VAG +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VLCG killed [[REG0]] +; CHECK-NEXT: VSTEG killed [[REG1]]{{.*}}noreg, 0 + %LHS = load i64, i64* %Src0 + %RHS = load i64, i64* %Src1 + %Val = add i64 %LHS, %RHS + %Res = sub i64 0, %Val + store i64 %Res, i64* %Dst + ret void +} + +; LCR +define void @fun7(i32* %Src0, i32* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK: # Machine code for function fun7: IsSSA, TracksLiveness +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 0 +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VAF +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VLCF killed [[REG0]] +; CHECK-NEXT: VSTEF killed [[REG1]]{{.*}}noreg, 0 + %LHS = load i32, i32* %Src0 + %RHS = load i32, i32* %Src1 + %Val = add i32 %LHS, %RHS + %Res = sub i32 0, %Val + store i32 %Res, i32* %Dst + ret void +} Index: llvm/test/CodeGen/SystemZ/domain-reassignment-06.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/domain-reassignment-06.ll @@ -0,0 +1,158 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -debug-only=systemz-domain-reassignment \ +; RUN: -verify-machineinstrs -domain-gprlim=0 2>&1 | FileCheck %s --check-prefixes=CHECK,Z14 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 -debug-only=systemz-domain-reassignment \ +; RUN: -verify-machineinstrs -domain-gprlim=0 2>&1 | FileCheck %s --check-prefixes=CHECK,Z15 +; REQUIRES: asserts +; +; Test domain reassignments for fp <-> int conversions. + +; CDGBR +define void @fun0(i64* %Src0, double* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun0: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEG {{.*}}noreg, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr64bit = COPY [[REG0]] +; CHECK-NEXT: [[REG2:%[0-9]+]]:vr64bit = WCDGB [[REG1]] +; CHECK-NEXT: [[REG3:%[0-9]+]]:fp64bit = COPY [[REG2]] +; CHECK-NEXT: VST64 killed [[REG3]] + %Val = load i64, i64* %Src0 + %Res = sitofp i64 %Val to double + store double %Res, double* %Dst + ret void +} + +; CEFBR +define void @fun1(i32* %Src0, float* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun1: IsSSA, TracksLiveness + +; Z14: Unable to reassign: Extraction{{.*}}CEFBR + + +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun1: IsSSA, TracksLiveness + +; Z15: [[REG0:%[0-9]+]]:vr128bit = VLEF{{.*}}noreg, 0 +; Z15-NEXT: [[REG1:%[0-9]+]]:vr32bit = COPY [[REG0]] +; Z15-NEXT: [[REG2:%[0-9]+]]:vr32bit = WCEFB [[REG1]] +; Z15-NEXT: [[REG3:%[0-9]+]]:fp32bit = COPY [[REG2]] +; Z15-NEXT: VST32 killed [[REG3]] + %Val = load i32, i32* %Src0 + %Res = sitofp i32 %Val to float + store float %Res, float* %Dst + ret void +} + +; CDLGBR +define void @fun2(i64* %Src0, double* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun2: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEG {{.*}}noreg, 0 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr64bit = COPY [[REG0]] +; CHECK-NEXT: [[REG2:%[0-9]+]]:vr64bit = WCDLGB [[REG1]] +; CHECK-NEXT: [[REG3:%[0-9]+]]:fp64bit = COPY [[REG2]] +; CHECK-NEXT: VST64 killed [[REG3]] + %Val = load i64, i64* %Src0 + %Res = uitofp i64 %Val to double + store double %Res, double* %Dst + ret void +} + +; CELFBR +define void @fun3(i32* %Src0, float* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun3: IsSSA, TracksLiveness + +; Z14: Unable to reassign: Extraction{{.*}}CELFBR + +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun3: IsSSA, TracksLiveness + +; Z15: [[REG0:%[0-9]+]]:vr128bit = VLEF {{.*}}noreg, 0 +; Z15-NEXT: [[REG1:%[0-9]+]]:vr32bit = COPY [[REG0]] +; Z15-NEXT: [[REG2:%[0-9]+]]:vr32bit = WCELFB [[REG1]] +; Z15-NEXT: [[REG3:%[0-9]+]]:fp32bit = COPY [[REG2]] +; Z15-NEXT: VST32 killed [[REG3]] + %Val = load i32, i32* %Src0 + %Res = uitofp i32 %Val to float + store float %Res, float* %Dst + ret void +} + +; CGDBR +define void @fun4(double* %Src0, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun4: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:fp64bit = VL64 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr64bit = COPY killed [[REG0]] +; CHECK-NEXT: [[REG2:%[0-9]+]]:vr64bit = WCGDB [[REG1]]:vr64bit, 0, 5 +; CHECK-NEXT: [[REG3:%[0-9]+]]:vr64bit = IMPLICIT_DEF +; CHECK-NEXT: [[REG4:%[0-9]+]]:vr128bit = INSERT_SUBREG [[REG3]]:vr64bit(tied-def 0), [[REG2]] +; CHECK-NEXT: VSTEG killed [[REG4]]{{.*}}noreg, 0 + %Val = load double, double* %Src0 + %Res = fptosi double %Val to i64 + store i64 %Res, i64* %Dst + ret void +} + +; CFEBR +define void @fun5(float* %Src0, i32* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun5: IsSSA, TracksLiveness + +; Z14-NOT: Legal closure found + +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun5: IsSSA, TracksLiveness + +; Z15: [[REG0:%[0-9]+]]:fp32bit = VL32 +; Z15-NEXT: [[REG1:%[0-9]+]]:vr32bit = COPY killed [[REG0]] +; Z15-NEXT: [[REG2:%[0-9]+]]:vr32bit = WCFEB [[REG1]]:vr32bit, 0, 5 +; Z15-NEXT: [[REG3:%[0-9]+]]:vr32bit = IMPLICIT_DEF +; Z15-NEXT: [[REG4:%[0-9]+]]:vr128bit = INSERT_SUBREG [[REG3]]:vr32bit(tied-def 0), [[REG2]] +; Z15-NEXT: VSTEF killed [[REG4]]{{.*}}noreg, 0 + + %Val = load float, float* %Src0 + %Res = fptosi float %Val to i32 + store i32 %Res, i32* %Dst + ret void +} + +; CLGDBR +define void @fun6(double* %Src0, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun6: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+]]:fp64bit = VL64 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr64bit = COPY killed [[REG0]] +; CHECK-NEXT: [[REG2:%[0-9]+]]:vr64bit = WCLGDB [[REG1]]:vr64bit, 0, 5 +; CHECK-NEXT: [[REG3:%[0-9]+]]:vr64bit = IMPLICIT_DEF +; CHECK-NEXT: [[REG4:%[0-9]+]]:vr128bit = INSERT_SUBREG [[REG3]]:vr64bit(tied-def 0), [[REG2]] +; CHECK-NEXT: VSTEG killed [[REG4]]{{.*}}noreg, 0 + %Val = load double, double* %Src0 + %Res = fptoui double %Val to i64 + store i64 %Res, i64* %Dst + ret void +} + +; CLFEBR +define void @fun7(float* %Src0, i32* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun7: IsSSA, TracksLiveness + +; Z14-NOT: Legal closure found + +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun7: IsSSA, TracksLiveness + +; Z15: [[REG0:%[0-9]+]]:fp32bit = VL32 +; Z15-NEXT: [[REG1:%[0-9]+]]:vr32bit = COPY killed [[REG0]] +; Z15-NEXT: [[REG2:%[0-9]+]]:vr32bit = WCLFEB [[REG1]]:vr32bit, 0, 5 +; Z15-NEXT: [[REG3:%[0-9]+]]:vr32bit = IMPLICIT_DEF +; Z15-NEXT: [[REG4:%[0-9]+]]:vr128bit = INSERT_SUBREG [[REG3]]:vr32bit(tied-def 0), [[REG2]] +; Z15-NEXT: VSTEF killed [[REG4]]{{.*}}noreg, 0 + + %Val = load float, float* %Src0 + %Res = fptoui float %Val to i32 + store i32 %Res, i32* %Dst + ret void +} Index: llvm/test/CodeGen/SystemZ/domain-reassignment-07.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/domain-reassignment-07.ll @@ -0,0 +1,711 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -debug-only=systemz-domain-reassignment \ +; RUN: -verify-machineinstrs -domain-gprlim=0 2>&1 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 -debug-only=systemz-domain-reassignment \ +; RUN: -verify-machineinstrs -domain-gprlim=0 2>&1 | FileCheck %s --check-prefix=Z15 + +; REQUIRES: asserts +; +; Test domain reassignments that have special vector lane requirements. + +;; LGF in G1 / LHMux in F1/F3 missing + +; Truncate i64 -> i32 puts result in lane 1. +define void @fun0(i64* %Src0, i64* %Src1, i32* %Src2, i32* %Dst, i16* %Dst1, i8* %Dst2) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun0: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VAG +; CHECK: VLEF {{.*}}noreg, 1 +; CHECK-NEXT: [[REG:%[0-9]+]]:vr128bit = VSF +; CHECK-NEXT: VSTEF [[REG]]{{.*}}noreg, 1 +; CHECK-NEXT: VSTEH [[REG]]{{.*}}noreg, 3 +; CHECK-NEXT: VSTEB [[REG]]{{.*}}noreg, 7 + %LHS = load i64, i64* %Src0 ; G0 + %RHS = load i64, i64* %Src1 + %Sum = add i64 %LHS, %RHS + %T = trunc i64 %Sum to i32 ; F1 + %RHS2 = load i32, i32* %Src2 + %Res = sub i32 %T, %RHS2 + store i32 %Res, i32* %Dst + %T2 = trunc i32 %Res to i16 + store i16 %T2, i16* %Dst1 + %T3 = trunc i16 %T2 to i8 + store i8 %T3, i8* %Dst2 + ret void +} + +; i32 sign extend puts result in lane 1. +define void @fun1(i64* %Src0, i64* %Src1, i32* %Src2, i64* %Dst, i64* %Dst2, + i32* %Dst3, i16* %Dst4, i8* %Dst5) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun1: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VUPHF +; CHECK-NEXT: VSTEG [[REG0]]{{.*}}noreg, 1 +; CHECK: VLEG {{.*}}noreg, 1 +; CHECK: VSG +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 +; CHECK: VLEF {{.*}}noreg, 3 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = VSF +; CHECK: VSTEF [[REG1]]{{.*}}noreg, 3 +; CHECK: VSTEH [[REG1]]{{.*}}noreg, 7 +; CHECK: VSTEB [[REG1]]{{.*}}noreg, 15 + + %L0 = load volatile i64, i64* %Src0 ; G0 + %T = trunc i64 %L0 to i32 + %S = sext i32 %T to i64 ; G1 + store i64 %S, i64* %Dst + + %L1 = load i64, i64* %Src1 ; G1 + %D = sub i64 %S, %L1 + store i64 %D, i64* %Dst2 + + %T2 = trunc i64 %L1 to i32 ; F3 + %L2 = load i32, i32* %Src2 + %D2 = sub i32 %T2, %L2 + store i32 %D2, i32* %Dst3 + %T3 = trunc i32 %D2 to i16 + store i16 %T3, i16* %Dst4 + %T4 = trunc i16 %T3 to i8 + store i8 %T4, i8* %Dst5 + + ret void +} + +; Extensions in G1 needs a Vector Unpack Low. +define void @fun2(i64* %Src0, i64* %Src1, i32* %Src2, i64* %Dst, i64* %Dst2, + i64* %Dst3, i32* %Dst4, i32* %Dst5) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun2: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VUPHF +; CHECK: [[REG0:%[0-9]+]]:vr128bit = VLEG {{.*}}noreg, 1 +; CHECK-NEXT: VSG +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = COPY [[REG0]] +; CHECK-NEXT: [[REG2:%[0-9]+]]:vr128bit = VUPLF [[REG1]] +; CHECK-NEXT: VSTEG killed [[REG2]]{{.*}}noreg, 1 +; CHECK-NEXT: [[REG3:%[0-9]+]]:vr128bit = VUPLLF [[REG1]] +; CHECK-NEXT: VSTEG killed [[REG3]]{{.*}}noreg, 1 +; CHECK-NEXT: [[REG4:%[0-9]+]]:vr128bit = VUPLHW [[REG1]] +; CHECK-NEXT: VSTEF killed [[REG4]]{{.*}}noreg, 3 +; CHECK-NEXT: [[REG5:%[0-9]+]]:vr128bit = VUPLLH [[REG1]] +; CHECK-NEXT: VSTEF killed [[REG5]]{{.*}}noreg, 3 + + %L0 = load volatile i64, i64* %Src0 ; G0 + %T = trunc i64 %L0 to i32 + %S = sext i32 %T to i64 ; G1 + + %L1 = load volatile i64, i64* %Src1 ; G1 + %D = sub i64 %S, %L1 + store i64 %D, i64* %Dst + + %T2 = trunc i64 %L1 to i32 ; F3 + %S2 = sext i32 %T2 to i64 + store i64 %S2, i64* %Dst2 ; G1 + + %S3 = zext i32 %T2 to i64 + store i64 %S3, i64* %Dst3 ; G1 + + %T4 = trunc i32 %T2 to i16 ; F3 + %S4 = sext i16 %T4 to i32 + store i32 %S4, i32* %Dst4 + + %S5 = zext i16 %T4 to i32 ; F3 + store i32 %S5, i32* %Dst5 + + ret void +} + +; Vector Unpack High. +; 16 -> 32 bit extensions from G0 (F1) ends up in F3 +define void @fun3(i64* %Src1, i64* %Src2, i32* %Dst, i32* %Dst2) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun3: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK-NEXT: [[REG0:%[0-9]+]]:vr128bit = VAG +; CHECK-NEXT: [[REG1:%[0-9]+]]:vr128bit = COPY [[REG0]] +; CHECK-NEXT: [[REG2:%[0-9]+]]:vr128bit = VUPLHH [[REG1]] +; CHECK-NEXT: VSTEF killed [[REG2]]{{.*}}noreg, 3 +; CHECK-NEXT: [[REG3:%[0-9]+]]:vr128bit = VUPHH [[REG1]] +; CHECK-NEXT: VSTEF killed [[REG3]]{{.*}}noreg, 3 + %LHS = load i64, i64* %Src1 + %RHS = load i64, i64* %Src2 + %Sum = add i64 %LHS, %RHS ; G0 + %T = trunc i64 %Sum to i16 + + %ext = zext i16 %T to i32 + store i32 %ext, i32* %Dst ; F3 + + %S2 = sext i16 %T to i32 + store i32 %S2, i32* %Dst2 ; F3 + + ret void +} + +; Load immediate in G1 / F3 +define void @fun4(i64* %Src0, i64* %Dst, i32* %Dst2) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun4: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VUPHF +; CHECK: VLEIG {{.*}}, -3, 1 +; CHECK: VAG +; CHECK: VSTEG{{.*}}noreg, 1 +; CHECK: VLEIF {{.*}}, -3, 3 +; CHECK: VAF +; CHECK: VSTEF{{.*}}noreg, 3 + + %L0 = load volatile i64, i64* %Src0 ; G0 + %T = trunc i64 %L0 to i32 + %S = sext i32 %T to i64 ; G1 + + %Res = add i64 %S, -3 + store i64 %Res, i64* %Dst + + %T2 = trunc i64 %Res to i32 + %Res2 = add i32 %T2, -3 + store i32 %Res2, i32* %Dst2 + + ret void +} + +; i64 arithmetic in lane G1. +define void @fun5(i64* %Src0, i64* %Src1, i64* %Src2, i64* %Src3, i64* %Src4, + i64* %Src5, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun5: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VUPHF +; CHECK: VLEG {{.*}}noreg, 1 +; CHECK-NEXT: VAG +; CHECK: VLEG {{.*}}noreg, 1 +; CHECK-NEXT: VSG +; CHECK: VLEG {{.*}}noreg, 1 +; CHECK-NEXT: VN +; CHECK: VLEG {{.*}}noreg, 1 +; CHECK-NEXT: VO +; CHECK: VLEG {{.*}}noreg, 1 +; CHECK-NEXT: VX +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 + + %L0 = load volatile i64, i64* %Src0 ; G0 + %T = trunc i64 %L0 to i32 + %S = sext i32 %T to i64 ; G1 + + %L1 = load i64, i64* %Src1 + %R1 = add i64 %S, %L1 + + %L2 = load i64, i64* %Src2 + %R2 = sub i64 %R1, %L2 + + %L3 = load i64, i64* %Src3 + %R3 = and i64 %R2, %L3 + + %L4 = load i64, i64* %Src4 + %R4 = or i64 %R3, %L4 + + %L5 = load i64, i64* %Src5 + %R5 = xor i64 %R4, %L5 + + store i64 %R5, i64* %Dst + + ret void +} + +; i32 arithemtic in lane F1 +define void @fun6(i64* %Src0, i32* %Src1, i32* %Src2, i32* %Src3, i32* %Src4, + i32* %Src5, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun6: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 1 +; CHECK-NEXT: VAF +; CHECK: VLEF {{.*}}noreg, 1 +; CHECK-NEXT: VSF +; CHECK: VLEF {{.*}}noreg, 1 +; CHECK-NEXT: VN +; CHECK: VLEF {{.*}}noreg, 1 +; CHECK-NEXT: VO +; CHECK: VLEF {{.*}}noreg, 1 +; CHECK-NEXT: VX +; CHECK-NEXT: VSTEF {{.*}}noreg, 1 + + %L0 = load volatile i64, i64* %Src0 ; G0 + %T = trunc i64 %L0 to i32 ; F1 + + %L1 = load i32, i32* %Src1 + %R1 = add i32 %T, %L1 + + %L2 = load i32, i32* %Src2 + %R2 = sub i32 %R1, %L2 + + %L3 = load i32, i32* %Src3 + %R3 = and i32 %R2, %L3 + + %L4 = load i32, i32* %Src4 + %R4 = or i32 %R3, %L4 + + %L5 = load i32, i32* %Src5 + %R5 = xor i32 %R4, %L5 + + store i32 %R5, i32* %Dst + + ret void +} + +; i32 arithemtic in lane F3 +define void @fun7(i64* %Src0, i64* %Src1, i32* %Src2, i32* %Src3, i32* %Src4, + i32* %Src5, i32* %Src6, i64* %Dst, i32* %Dst2) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun7: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VUPHF +; CHECK: VLEG {{.*}}noreg, 1 +; CHECK: VSG +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 + +; CHECK: VLEF {{.*}}noreg, 3 +; CHECK-NEXT: VAF +; CHECK: VLEF {{.*}}noreg, 3 +; CHECK-NEXT: VSF +; CHECK: VLEF {{.*}}noreg, 3 +; CHECK-NEXT: VN +; CHECK: VLEF {{.*}}noreg, 3 +; CHECK-NEXT: VO +; CHECK: VLEF {{.*}}noreg, 3 +; CHECK-NEXT: VX +; CHECK-NEXT: VSTEF {{.*}}noreg, 3 + + %L0 = load volatile i64, i64* %Src0 ; G0 + %T0 = trunc i64 %L0 to i32 + %S0 = sext i32 %T0 to i64 ; G1 + + %L = load i64, i64* %Src1 ; G1 + %D = sub i64 %S0, %L + store i64 %D, i64* %Dst + + %T1 = trunc i64 %L to i32 ; F3 + %L1 = load i32, i32* %Src2 + %R1 = add i32 %T1, %L1 + + %L2 = load i32, i32* %Src3 + %R2 = sub i32 %R1, %L2 + + %L3 = load i32, i32* %Src4 + %R3 = and i32 %R2, %L3 + + %L4 = load i32, i32* %Src5 + %R4 = or i32 %R3, %L4 + + %L5 = load i32, i32* %Src6 + %R5 = xor i32 %R4, %L5 + + store i32 %R5, i32* %Dst2 + + ret void +} + +; AGRK and SGRK in G1. +define void @fun8(i64* %Src0, i64* %Src1, i64* %Src2, + i64* %Dst, i64* %Dst1, i64* %Dst2, i64* %Dst3) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun8: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VUPHF +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 +; CHECK: VLEG {{.*}}noreg, 1 +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 +; CHECK-NEXT: VSG +; CHECK: VLEG {{.*}}noreg, 1 +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 +; CHECK-NEXT: VAG +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 + + %L0 = load volatile i64, i64* %Src0 ; G0 + %T = trunc i64 %L0 to i32 + %S = sext i32 %T to i64 ; G1 + store i64 %S, i64* %Dst + + %L1 = load i64, i64* %Src1 + store i64 %L1, i64* %Dst1 + %S1 = sub i64 %S, %L1 + + %L2 = load i64, i64* %Src2 + store i64 %L2, i64* %Dst2 + %S2 = add i64 %S1, %L2 + + store i64 %S2, i64* %Dst3 + ret void +} + +; ARK and SRK in F1. +define void @fun9(i64* %Src0, i32* %Src1, i32* %Src2, + i32* %Dst, i32* %Dst1, i32* %Dst2, i32* %Dst3) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun9: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VSTEF {{.*}}noreg, 1 +; CHECK: VLEF {{.*}}noreg, 1 +; CHECK-NEXT: VSTEF {{.*}}noreg, 1 +; CHECK-NEXT: VSF +; CHECK: VLEF {{.*}}noreg, 1 +; CHECK-NEXT: VSTEF {{.*}}noreg, 1 +; CHECK-NEXT: VAF +; CHECK-NEXT: VSTEF {{.*}}noreg, 1 + + %L0 = load volatile i64, i64* %Src0 ; G0 + %T = trunc i64 %L0 to i32 ; F1 + store i32 %T, i32* %Dst + + %L1 = load i32, i32* %Src1 + store i32 %L1, i32* %Dst1 + %S1 = sub i32 %T, %L1 + + %L2 = load i32, i32* %Src2 + store i32 %L2, i32* %Dst2 + %S2 = add i32 %S1, %L2 + + store i32 %S2, i32* %Dst3 + ret void +} + +; ARK and SRK in F3. +define void @fun10(i64* %Src0, i64* %Src00, i32* %Src1, i32* %Src2, + i64* %Dst, i64* %Dst00, i64* %Dst01, i32* %Dst1, i32* %Dst2, i32* %Dst3) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun10: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VUPHF +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 +; CHECK: VLEG {{.*}}noreg, 1 +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 +; CHECK-NEXT: VSG +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 + +; CHECK: VLEF {{.*}}noreg, 3 +; CHECK-NEXT: VSTEF {{.*}}noreg, 3 +; CHECK-NEXT: VSF +; CHECK: VLEF {{.*}}noreg, 3 +; CHECK-NEXT: VSTEF {{.*}}noreg, 3 +; CHECK-NEXT: VAF +; CHECK-NEXT: VSTEF {{.*}}noreg, 3 + + %L0 = load volatile i64, i64* %Src0 ; G0 + %T = trunc i64 %L0 to i32 + %S = sext i32 %T to i64 ; G1 + store i64 %S, i64* %Dst + + %L00 = load volatile i64, i64* %Src00 ; G1 + store i64 %L00, i64* %Dst00 + %D = sub i64 %S, %L00 + store i64 %D, i64* %Dst01 + + %T1 = trunc i64 %D to i32 ; F3 + %L1 = load i32, i32* %Src1 + store i32 %L1, i32* %Dst1 + %S1 = sub i32 %T1, %L1 + + %L2 = load i32, i32* %Src2 + store i32 %L2, i32* %Dst2 + %S2 = add i32 %S1, %L2 + + store i32 %S2, i32* %Dst3 + ret void +} + +; AGHIK and OILL64 in G1 +define void @fun11(i64* %Src0, i64* %Src1, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun11: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VUPHF +; CHECK: VLEG {{.*}}noreg, 1 +; CHECK: VSG +; CHECK: VLEIG {{.*}}-16, 1 +; CHECK-NEXT: VAG +; CHECK: VLEIG {{.*}}1, 1 +; CHECK-NEXT: VO +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 + %L0 = load volatile i64, i64* %Src0 ; G0 + %T = trunc i64 %L0 to i32 + %LHS = sext i32 %T to i64 ; G1 + %RHS = load i64, i64* %Src1 + %Sum = sub i64 %LHS, %RHS + %S2 = add i64 %Sum, -16 + %Res = or i64 %S2, 1 + store i64 %Res, i64* %Dst + ret void +} + +; AHIMuxK and OILMux in F1 +define void @fun12(i64* %Src0, i32* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun12: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VLEF {{.*}}noreg, 1 +; CHECK-NEXT: VAF +; CHECK: VLEIF {{.*}}-16, 1 +; CHECK-NEXT: VAF +; CHECK: VLEIF {{.*}}1, 1 +; CHECK-NEXT: VO +; CHECK: VSTEF {{.*}}noreg, 1 + %L0 = load volatile i64, i64* %Src0 + %LHS = trunc i64 %L0 to i32 ; F1 + %RHS = load i32, i32* %Src1 + %Sum = add i32 %LHS, %RHS + %S2 = add i32 %Sum, -16 + %Res = or i32 %S2, 1 + store i32 %Res, i32* %Dst + ret void +} + +; AHIMuxK and OILMux in F3 +define void @fun13(i64* %Src0, i64* %Src00, i32* %Src1, + i64* %Dst, i64* %Dst00, i64* %Dst01, i32* %Dst1) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun13: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VUPHF +; CHECK-NEXT: VSTEG{{.*}}noreg, 1 +; CHECK: VLEG {{.*}}noreg, 1 +; CHECK-NEXT: VSTEG{{.*}}noreg, 1 +; CHECK-NEXT: VSG +; CHECK-NEXT: VSTEG{{.*}}noreg, 1 +; CHECK: VLEF {{.*}}noreg, 3 +; CHECK-NEXT: VAF +; CHECK: VLEIF {{.*}}-16, 3 +; CHECK-NEXT: VAF +; CHECK: VLEIF {{.*}}1, 3 +; CHECK-NEXT: VO +; CHECK: VSTEF {{.*}}noreg, 3 + %L0 = load volatile i64, i64* %Src0 + %T = trunc i64 %L0 to i32 + %S = sext i32 %T to i64 ; G1 + store i64 %S, i64* %Dst + + %L00 = load volatile i64, i64* %Src00 ; G1 + store i64 %L00, i64* %Dst00 + %D = sub i64 %S, %L00 + store i64 %D, i64* %Dst01 + + %LHS = trunc i64 %D to i32 ; F3 + %RHS = load i32, i32* %Src1 + %Sum = add i32 %LHS, %RHS + %S2 = add i32 %Sum, -16 + %Res = or i32 %S2, 1 + store i32 %Res, i32* %Dst1 + ret void +} + +; Logical instructions in G1. +define void @fun14(i64* %Src0, i64* %Dst, i64* %Dst1, i64* %Dst2, i64* %Dst3) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun14: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VUPHF +; CHECK-NEXT: VESLG +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 +; CHECK-NEXT: VESRLG +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 +; CHECK-NEXT: VESRAG +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 +; CHECK-NEXT: VLCG +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 + %L = load volatile i64, i64* %Src0 + %T = trunc i64 %L to i32 + %S = sext i32 %T to i64 ; G1 + %R0 = shl i64 %S, 2 + store i64 %R0, i64* %Dst + %R1 = lshr i64 %S, 2 + store i64 %R1, i64* %Dst1 + %R2 = ashr i64 %S, 2 + store i64 %R2, i64* %Dst2 + %R3 = sub i64 0, %R2 + store i64 %R3, i64* %Dst3 + ret void +} + +; Logical instructions in F1. +define void @fun15(i64* %Src0, i32* %Dst, i32* %Dst1, i32* %Dst2, i32* %Dst3) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun15: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VESLF +; CHECK-NEXT: VSTEF {{.*}}noreg, 1 +; CHECK-NEXT: VESRLF +; CHECK-NEXT: VSTEF {{.*}}noreg, 1 +; CHECK-NEXT: VESRAF +; CHECK-NEXT: VSTEF {{.*}}noreg, 1 +; CHECK-NEXT: VLCF +; CHECK-NEXT: VSTEF {{.*}}noreg, 1 + %L = load volatile i64, i64* %Src0 + %T = trunc i64 %L to i32 + + %R0 = shl i32 %T, 2 + store i32 %R0, i32* %Dst + %R1 = lshr i32 %T, 2 + store i32 %R1, i32* %Dst1 + %R2 = ashr i32 %T, 2 + store i32 %R2, i32* %Dst2 + %R3 = sub i32 0, %R2 + store i32 %R3, i32* %Dst3 + ret void +} + +; Logical instructions in F1. +define void @fun16(i64* %Src0, i64* %Src00, i32* %Src1, + i64* %Dst0, i64* %Dst00, i64* %Dst01, + i32* %Dst, i32* %Dst1, i32* %Dst2, i32* %Dst3) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun16: IsSSA, TracksLiveness + +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: VUPHF +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 +; CHECK: VLEG {{.*}}noreg, 1 +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 +; CHECK-NEXT: VSG +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 + +; CHECK: VESLF +; CHECK-NEXT: VSTEF {{.*}}noreg, 3 +; CHECK-NEXT: VESRLF +; CHECK-NEXT: VSTEF {{.*}}noreg, 3 +; CHECK-NEXT: VESRAF +; CHECK-NEXT: VSTEF {{.*}}noreg, 3 +; CHECK-NEXT: VLCF +; CHECK-NEXT: VSTEF {{.*}}noreg, 3 + + %L0 = load volatile i64, i64* %Src0 + %T0 = trunc i64 %L0 to i32 + %S = sext i32 %T0 to i64 ; G1 + store i64 %S, i64* %Dst0 + + %L00 = load volatile i64, i64* %Src00 ; G1 + store i64 %L00, i64* %Dst00 + %D = sub i64 %S, %L00 + store i64 %D, i64* %Dst01 + + %T = trunc i64 %D to i32 ; F3 + %R0 = shl i32 %T, 2 + store i32 %R0, i32* %Dst + %R1 = lshr i32 %T, 2 + store i32 %R1, i32* %Dst1 + %R2 = ashr i32 %T, 2 + store i32 %R2, i32* %Dst2 + %R3 = sub i32 0, %R2 + store i32 %R3, i32* %Dst3 + ret void +} + +; MS; MSRKC +define void @fun17(i32* %Src0, i64* %Src1, i32* %Src2, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun17: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+:vr128bit]] = VLEF {{.*}}noreg, 1 +; CHECK: [[REG1:%[0-9]+:vr128bit]] = VLEG {{.*}}noreg, 0 +; CHECK: [[REG2:%[0-9]+:vr128bit]] = COPY [[REG1]] +; CHECK: [[REG3:%[0-9]+:vr128bit]] = VMOF killed [[REG0]], killed [[REG2]] +; CHECK: [[REG4:%[0-9]+:vr128bit]] = VLEF {{.*}}noreg, 1 +; CHECK: [[REG5:%[0-9]+:vr128bit]] = VMOF [[REG3]], [[REG4]] +; CHECK: VSTEF killed [[REG5]]{{.*}}noreg, 1 + %LHS = load i32, i32* %Src0 + %L = load volatile i64, i64* %Src1 + %RHS = trunc i64 %L to i32 + %C = load i32, i32* %Src2 + %Prd = mul i32 %LHS, %RHS + %Res = mul i32 %Prd, %C + store i32 %Res, i32* %Dst + ret void +} + +;;; Some tests with conflicting lanes + +; MSRKC +define void @fun18(i64* %Src0, double* %Src2, i64* %Dst0, i32* %Dst3) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun18: IsSSA, TracksLiveness +; CHECK: No lanes: + %L = load volatile i64, i64* %Src0 + %T = trunc i64 %L to i32 + %S = sext i32 %T to i64 + %Sh = shl i64 %S, 2 + store i64 %Sh, i64* %Dst0 + + %T1 = trunc i64 %Sh to i32 ; F3 + + %L2 = load volatile double, double* %Src2 + %C2 = fptosi double %L2 to i64 ; G0 + %T2 = trunc i64 %C2 to i32 ; F1 + + %Res3 = mul i32 %T1, %T2 + store i32 %Res3, i32* %Dst3 + ret void +} + +; VLEZ +define void @fun19(i64* %Src0, i8* %Src1, i64* %Dst, i64* %Dst1, i64* %Dst2, i64* %Dst3) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun19: IsSSA, TracksLiveness +; CHECK: No lanes: + %L = load volatile i64, i64* %Src0 + %T = trunc i64 %L to i32 + %S = sext i32 %T to i64 ; G1 + %Sh = shl i64 %S, 2 + + %L1 = load i8, i8* %Src1 + %Z1 = zext i8 %L1 to i64 ; G0 + + %Sum = add i64 %Sh, %Z1 ; conflict + store i64 %Sum, i64* %Dst + + ret void +} + +; CDGBR +define void @fun20(i64* %Src, double* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun20: IsSSA, TracksLiveness +; CHECK: No lanes: + %L = load volatile i64, i64* %Src + %T = trunc i64 %L to i32 + %S = sext i32 %T to i64 ; G1 + + %Res = sitofp i64 %S to double + store double %Res, double* %Dst + ret void +} + +; CGDBR +define void @fun21(i64* %Src, double* %Src1, i64* %Dst, i64* %Dst2) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun21: IsSSA, TracksLiveness +; CHECK: No lanes: + %L = load volatile i64, i64* %Src + %T = trunc i64 %L to i32 + %S = sext i32 %T to i64 ; G1 + %Sh = shl i64 %S, 2 + + %D = load double, double* %Src1 + %C = fptosi double %D to i64 ; G0 + + %Res = add i64 %Sh, %C ; conflict + store i64 %Res, i64* %Dst + ret void +} + +; CEFBR +define void @fun22(i64* %Src, float* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun22: IsSSA, TracksLiveness +; Z15: No lanes: + %L = load volatile i64, i64* %Src + %T = trunc i64 %L to i32 + + %Res = sitofp i32 %T to float + store float %Res, float* %Dst + ret void +} Index: llvm/test/CodeGen/SystemZ/domain-reassignment-08.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/domain-reassignment-08.ll @@ -0,0 +1,129 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -debug-only=systemz-domain-reassignment \ +; RUN: -verify-machineinstrs -domain-gprlim=0 2>&1 | FileCheck %s +; REQUIRES: asserts +; +; Test domain reassignments involving PHI nodes. + +define void @fun0(i64* %Dst, i64* %Src0, i64* %Src1, i64 %Val0, i64 %Val1) { +; CHECK-LABEL: bb.4.join: +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun0: IsSSA, TracksLiveness +; CHECK-LABEL: bb.0 (%ir-block.0): +; CHECK: [[REG0:%[0-9]+:vr128bit]] = VLEIG {{.*}}, 0, 0 + +; CHECK-LABEL: bb.2.bb1: +; CHECK: [[REG1:%[0-9]+:vr128bit]] = VLEG {{.*}}noreg, 0 + +; CHECK-LABEL: bb.3.bb2: +; CHECK: [[REG2:%[0-9]+:vr128bit]] = VLEG {{.*}}noreg, 0 + +; CHECK-LABEL: bb.4.join: +; CHECK: [[REG3:%[0-9]+:vr128bit]] = PHI [[REG0]], %bb.0, [[REG2]], %bb.3, [[REG1]], %bb.2 +; CHECK-NEXT: VSTEG [[REG3]]{{.*}}noreg, 0 +; CHECK-NEXT: Return + + %Cmp0 = icmp eq i64 %Val0, 0 + br i1 %Cmp0, label %bb0, label %bb3 + +bb0: + %Cmp1 = icmp eq i64 %Val1, 0 + br i1 %Cmp1, label %bb1, label %bb2 + +bb1: + %L0 = load i64, i64* %Src0 + br label %join + +bb2: + %L1 = load i64, i64* %Src1 + br label %join + +bb3: + br label %join + +join: + %Res = phi i64 [%L0, %bb1], [%L1, %bb2], [0, %bb3] + store i64 %Res, i64* %Dst + ret void +} + +; The unpack requires all instructions to use lane 1. +define void @fun1(i64* %Dst, i64* %Src0, i64* %Src1, i64 %Val0, i64 %Val1) { +; CHECK-LABEL: bb.4.join: +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun1: IsSSA, TracksLiveness +; CHECK-LABEL: bb.0 (%ir-block.0): +; CHECK: [[REG0:%[0-9]+:vr128bit]] = VLEIG {{.*}}, 0, 1 + +; CHECK-LABEL: bb.2.bb1: +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK-NEXT: COPY +; CHECK-NEXT: [[REG1:%[0-9]+:vr128bit]] = VUPHF + +; CHECK-LABEL: bb.3.bb2: +; CHECK: [[REG2:%[0-9]+:vr128bit]] = VLEG {{.*}}noreg, 1 + +; CHECK-LABEL: bb.4.join: +; CHECK: [[REG3:%[0-9]+:vr128bit]] = PHI [[REG0]], %bb.0, [[REG2]], %bb.3, [[REG1]], %bb.2 +; CHECK-NEXT: VSTEG [[REG3]]{{.*}}noreg, 1 +; CHECK-NEXT: Return + + %Cmp0 = icmp eq i64 %Val0, 0 + br i1 %Cmp0, label %bb0, label %bb3 + +bb0: + %Cmp1 = icmp eq i64 %Val1, 0 + br i1 %Cmp1, label %bb1, label %bb2 + +bb1: + %L0 = load volatile i64, i64* %Src0 + %T0 = trunc i64 %L0 to i32 + %S0 = sext i32 %T0 to i64 ; G1 + br label %join + +bb2: + %L1 = load i64, i64* %Src1 + br label %join + +bb3: + br label %join + +join: + %Res = phi i64 [%S0, %bb1], [%L1, %bb2], [0, %bb3] + store i64 %Res, i64* %Dst + ret void +} + +; Conflicting lanes +define void @fun2(i64* %Dst, i64* %Src0, double* %Src1, i64 %Val0, i64 %Val1) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun2: IsSSA, TracksLiveness +; CHECK: No lanes: {{.*}} COPY +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun2: IsSSA, TracksLiveness + + %Cmp0 = icmp eq i64 %Val0, 0 + br i1 %Cmp0, label %bb0, label %bb3 + +bb0: + %Cmp1 = icmp eq i64 %Val1, 0 + br i1 %Cmp1, label %bb1, label %bb2 + +bb1: + %L0 = load volatile i64, i64* %Src0 + %T0 = trunc i64 %L0 to i32 + %S0 = sext i32 %T0 to i64 ; G1 + br label %join + +bb2: + %L1 = load double, double* %Src1 + %I1 = fptosi double %L1 to i64 ; G0 + br label %join + +bb3: + br label %join + +join: + %Res = phi i64 [%S0, %bb1], [%I1, %bb2], [0, %bb3] + store i64 %Res, i64* %Dst + ret void +} Index: llvm/test/CodeGen/SystemZ/domain-reassignment-09.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/domain-reassignment-09.ll @@ -0,0 +1,151 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -debug-only=systemz-domain-reassignment \ +; RUN: -verify-machineinstrs -domain-gprlim=0 2>&1 | FileCheck %s +; REQUIRES: asserts +; +; Test domain reassignments involving VLGV instructions. + +define void @fun0(<2 x i64>* %Src0, <2 x i64>* %Src1, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun0: IsSSA, TracksLiveness +; CHECK: VL +; CHECK-NEXT: COPY +; CHECK-NEXT: VL +; CHECK-NEXT: COPY +; CHECK-NEXT: VAG +; CHECK-NEXT: VSTEG {{.*}}noreg, 0 + %V0 = load volatile <2 x i64>, <2 x i64>* %Src0 + %EltA = extractelement <2 x i64> %V0, i32 0 + %V1 = load volatile <2 x i64>, <2 x i64>* %Src1 + %EltB = extractelement <2 x i64> %V1, i32 0 + %Res = add i64 %EltA, %EltB + store i64 %Res, i64* %Dst + ret void +} + +define void @fun1(<2 x i64>* %Src0, <2 x i64>* %Src1, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun1: IsSSA, TracksLiveness +; CHECK: VL +; CHECK-NEXT: COPY +; CHECK-NEXT: VL +; CHECK-NEXT: COPY +; CHECK-NEXT: VAG +; CHECK-NEXT: VSTEG {{.*}}noreg, 1 + %V0 = load volatile <2 x i64>, <2 x i64>* %Src0 + %EltA = extractelement <2 x i64> %V0, i32 1 + %V1 = load volatile <2 x i64>, <2 x i64>* %Src1 + %EltB = extractelement <2 x i64> %V1, i32 1 + %Res = add i64 %EltA, %EltB + store i64 %Res, i64* %Dst + ret void +} + +define void @fun2(<2 x i64>* %Src0, <2 x i64>* %Src1, i64* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun2: IsSSA, TracksLiveness +; CHECK: No lanes: +; CHECK-NEXT: --- Invalidated Closure +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun2: IsSSA, TracksLiveness + %V0 = load volatile <2 x i64>, <2 x i64>* %Src0 + %EltA = extractelement <2 x i64> %V0, i32 0 + %V1 = load volatile <2 x i64>, <2 x i64>* %Src1 + %EltB = extractelement <2 x i64> %V1, i32 1 + %Res = add i64 %EltA, %EltB + store i64 %Res, i64* %Dst + ret void +} + +define void @fun3(<4 x i32>* %Src0, <4 x i32>* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun3: IsSSA, TracksLiveness +; CHECK: VL +; CHECK-NEXT: COPY +; CHECK-NEXT: VL +; CHECK-NEXT: COPY +; CHECK-NEXT: VAF +; CHECK-NEXT: VSTEF {{.*}}noreg, 1 + %V0 = load volatile <4 x i32>, <4 x i32>* %Src0 + %EltA = extractelement <4 x i32> %V0, i32 1 + %V1 = load volatile <4 x i32>, <4 x i32>* %Src1 + %EltB = extractelement <4 x i32> %V1, i32 1 + %Res = add i32 %EltA, %EltB + store i32 %Res, i32* %Dst + ret void +} + +define void @fun4(<4 x i32>* %Src0, <4 x i32>* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun4: IsSSA, TracksLiveness +; CHECK: No lanes: {{.*}} ARK +; CHECK-NEXT: --- Invalidated Closure +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun4: IsSSA, TracksLiveness + %V0 = load volatile <4 x i32>, <4 x i32>* %Src0 + %EltA = extractelement <4 x i32> %V0, i32 0 + %V1 = load volatile <4 x i32>, <4 x i32>* %Src1 + %EltB = extractelement <4 x i32> %V1, i32 2 + %Res = add i32 %EltA, %EltB + store i32 %Res, i32* %Dst + ret void +} + +;;;; Compares can only be done in specific lane (G0 / F1). + +define void @fun5(<2 x i64> %Vec, i64* %Src) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun5: IsSSA, TracksLiveness +; CHECK: bb.0.entry: +; CHECK: VLEG {{.*}} $noreg, 0 +; CHECK-NEXT: VECG +; CHECK: bb.1.bb1: +; CHECK: VLGVG {{.*}} $noreg, 1 +; CHECK-NEXT: CG +entry: + %E0 = extractelement <2 x i64> %Vec, i32 0 + %L0 = load volatile i64, i64* %Src + %CC0 = icmp eq i64 %L0, %E0 + br i1 %CC0, label %bb1, label %bb3 + +bb1: + %E1 = extractelement <2 x i64> %Vec, i32 1 + %L1 = load volatile i64, i64* %Src + %CC1 = icmp eq i64 %L1, %E1 + br i1 %CC1, label %bb2, label %bb3 + +bb2: + %L2 = load volatile i64, i64* %Src + br label %bb3 + +bb3: + ret void +} + +define void @fun6(<4 x i32> %Vec, i32* %Src) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun6: IsSSA, TracksLiveness +; CHECK: bb.0.entry: +; CHECK: VLGVF {{.*}} $noreg, 0 +; CHECK: CMux +; CHECK: bb.1.bb1: +; CHECK: VLEF {{.*}} $noreg, 1 +; CHECK-NEXT: VECF +entry: + %E0 = extractelement <4 x i32> %Vec, i32 0 + %L0 = load volatile i32, i32* %Src + %CC0 = icmp eq i32 %L0, %E0 + br i1 %CC0, label %bb1, label %bb3 + +bb1: + %E1 = extractelement <4 x i32> %Vec, i32 1 + %L1 = load volatile i32, i32* %Src + %CC1 = icmp eq i32 %L1, %E1 + br i1 %CC1, label %bb2, label %bb3 + +bb2: + %L2 = load volatile i32, i32* %Src + br label %bb3 + +bb3: + ret void +} Index: llvm/test/CodeGen/SystemZ/domain-reassignment-11.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/domain-reassignment-11.ll @@ -0,0 +1,343 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -debug-only=systemz-domain-reassignment \ +; RUN: -verify-machineinstrs -domain-gprlim=0 2>&1 | FileCheck %s +; REQUIRES: asserts +; +; Test domain reassignments involving insertion of scalar results into closures. + +; Insert one scalar value into closure. +define void @fun0(i64* %Src0, i64* %Src1, i64* %Src2, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun0: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+:gr64bit]] = MSGRKC +; CHECK: [[REG1:%[0-9]+:vr128bit]] = VLVGG {{.*}}, [[REG0]], $noreg, 0 +; CHECK: [[REG2:%[0-9]+:vr128bit]] = VLEG {{.*}}noreg, 0 +; CHECK-NEXT: [[REG3:%[0-9]+:vr128bit]] = VAG [[REG1]], [[REG2]] +; CHECK-NEXT: VSTEG killed [[REG3]]{{.*}}noreg, 0 + %LHS = load i64, i64* %Src0 + %RHS = load i64, i64* %Src1 + %Prd = mul i64 %LHS, %RHS + %Prd1 = mul i64 %Prd, %RHS + %RHS2 = load i64, i64* %Src2 + %Sum = add i64 %Prd1, %RHS2 + store i64 %Sum, i64* %Dst + ret void +} + +; Insert one scalar value into closure using it twice. +define void @fun1(i64* %Src0, i64* %Src1, i64* %Src2, i64* %Src3, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun1: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+:gr64bit]] = MSGRKC +; CHECK: [[REG1:%[0-9]+:vr128bit]] = VLVGG {{.*}}, [[REG0]], $noreg, 0 +; CHECK: [[REG2:%[0-9]+:vr128bit]] = VLEG {{.*}}noreg, 0 +; CHECK-NEXT: [[REG3:%[0-9]+:vr128bit]] = VAG [[REG1]], [[REG2]] +; CHECK: [[REG4:%[0-9]+:vr128bit]] = VLEG {{.*}}noreg, 0 +; CHECK-NEXT: [[REG5:%[0-9]+:vr128bit]] = VAG [[REG1]], [[REG4]] +; CHECK-NEXT: [[REG6:%[0-9]+:vr128bit]] = VAG killed [[REG3]], killed [[REG5]] +; CHECK-NEXT: VSTEG killed [[REG6]]{{.*}}noreg, 0 + %LHS = load i64, i64* %Src0 + %RHS = load i64, i64* %Src1 + %Prd = mul i64 %LHS, %RHS + %Prd1 = mul i64 %Prd, %RHS + %RHS2 = load i64, i64* %Src2 + %Sum = add i64 %Prd1, %RHS2 + %RHS3 = load i64, i64* %Src3 + %Sum2 = add i64 %Prd1, %RHS3 + %Sum3 = add i64 %Sum, %Sum2 + store i64 %Sum3, i64* %Dst + ret void +} + +; A scalar value used in closure as well as outside of it. +define void @fun2(i64* %Src0, i64* %Src1, i64* %Src2, i64* %Src3, i64* %Dst, i64* %Dst2) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun2: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+:gr64bit]] = MSGRKC +; CHECK: [[REG1:%[0-9]+:vr128bit]] = VLVGG {{.*}}, [[REG0]], $noreg, 0 +; CHECK: VAG [[REG1]] +; CHECK: [[REG2:%[0-9]+:gr128bit]] = INSERT_SUBREG {{.*}} [[REG0]] +; CHECK: DSG [[REG2]] + + %LHS = load i64, i64* %Src0 + %RHS = load i64, i64* %Src1 + %Prd = mul i64 %LHS, %RHS + %Prd1 = mul i64 %Prd, %RHS + %RHS2 = load i64, i64* %Src2 + %Sum = add i64 %Prd1, %RHS2 + store i64 %Sum, i64* %Dst + %Div = load i64, i64* %Src3 + %D = sdiv i64 %Prd1, %Div + store i64 %D, i64* %Dst2 + ret void +} + +; mul result is inserted into a closure but scalar value also used +; TODO +; define i64 @fun2b(i64 %arg0, i64 %arg1, i64* %Dst, i64 %arg2, i64 %Ref) { +; entry: +; br label %bb + +; bb: +; %P = mul i64 %arg0, %arg1 +; %res = sub i64 %P, %arg2 +; store volatile i64 %res, i64* %Dst +; %c = icmp ne i64 %Ref, 0 +; br i1 %c, label %exit, label %bb + +; exit: +; ret i64 %P +; } + +; A scalar value used in closure, but scalar instruction using value defined +; in closure. +define void @fun3(i64* %Src0, i64* %Src1, i64* %Src2, i64* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun3: IsSSA, TracksLiveness +; CHECK: Unable to reassign: Extraction{{.*}}MSG +; CHECK: Unable to reassign: Extraction{{.*}}MSGRKC +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun3: IsSSA, TracksLiveness + %LHS = load i64, i64* %Src0 + %RHS = load i64, i64* %Src1 + %Prd = mul i64 %LHS, %RHS + %RHS2 = load i64, i64* %Src2 + %Prd1 = mul i64 %Prd, %RHS2 + %Sum = add i64 %Prd1, %RHS2 + store i64 %Sum, i64* %Dst + ret void +} + +; A scalar value inserted into vector element G1. +define void @fun4(i64* %Src0, i64* %Src1, i64* %Src2, i64* %Src3, i64* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun4: IsSSA, TracksLiveness +; CHECK: VLEG {{.*}}noreg, 0 +; CHECK: [[REG0:%[0-9]+:vr128bit]] = VUPHF +; CHECK: [[REG1:%[0-9]+:vr128bit]] = VLEG {{.*}}noreg, 1 +; CHECK: [[REG2:%[0-9]+:vr128bit]] = VAG [[REG0]], [[REG1]] +; CHECK: [[REG3:%[0-9]+:gr64bit]] = MSGRKC +; CHECK: [[REG4:%[0-9]+:vr128bit]] = VLVGG {{.*}} [[REG3]], $noreg, 1 +; CHECK: [[REG5:%[0-9]+:vr128bit]] = VAG killed [[REG2]], killed [[REG4]] +; CHECK: VSTEG killed [[REG5]]{{.*}}noreg, 1 + %L0 = load volatile i64, i64* %Src0 + %T0 = trunc i64 %L0 to i32 + %S0 = sext i32 %T0 to i64 ; G1 + + %L1 = load i64, i64* %Src1 + %R1 = add i64 %S0, %L1 + + %LHS = load i64, i64* %Src2 + %RHS = load i64, i64* %Src3 + %Prd = mul i64 %LHS, %RHS + %Prd1 = mul i64 %Prd, %RHS ; MSGRKC + + %Res = add i64 %R1, %Prd1 + store i64 %Res, i64* %Dst + ret void +} + +; A scalar value inserted into vector element F1. +define void @fun5(i64* %Src0, float* %Src1, i32* %Dst) { +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun5: IsSSA, TracksLiveness +; CHECK: [[REG0:%[0-9]+:vr128bit]] = VLEG {{.*}}noreg, 0 +; CHECK: [[REG1:%[0-9]+:vr128bit]] = COPY [[REG0]] +; CHECK: [[REG2:%[0-9]+:gr32bit]] = nofpexcept CFEBR +; CHECK: [[REG3:%[0-9]+:vr128bit]] = VLVGF {{.*}} [[REG2]], $noreg, 1 +; CHECK: [[REG4:%[0-9]+:vr128bit]] = VAF killed [[REG1]], killed [[REG3]] +; CHECK: VSTEF killed [[REG4]]{{.*}}noreg, 1 + %L0 = load volatile i64, i64* %Src0 + %T = trunc i64 %L0 to i32 ; F1 + + %L1 = load float, float* %Src1 + %I1 = fptosi float %L1 to i32 + + %Res = add i32 %T, %I1 + store i32 %Res, i32* %Dst + ret void +} + +; A scalar value needs insertion but the two users require different lanes. +define void @fun6(i64* %Src0, i64* %Src1, i64* %Src2, i64* %Src3, + double* %Src4, i64* %Dst1, i64* %Dst2) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun6: IsSSA, TracksLiveness +; CHECK: No lanes +; CHECK-NEXT: Invalidated Closure +; CHECK-NOT: Reassigning closure +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun6: IsSSA, TracksLiveness + %L0 = load volatile i64, i64* %Src0 + %T0 = trunc i64 %L0 to i32 + %S0 = sext i32 %T0 to i64 + + %L1 = load i64, i64* %Src1 + %R1 = add i64 %S0, %L1 ; G1 + + %LHS = load i64, i64* %Src2 + %RHS = load i64, i64* %Src3 + %Prd = mul i64 %LHS, %RHS + %Prd1 = mul i64 %Prd, %RHS ; MSGRKC + + %L4 = load double, double* %Src4 + %I4 = fptosi double %L4 to i64 ; G0 + + %A1 = add i64 %R1, %Prd1 ; Conflict + store i64 %A1, i64* %Dst1 + + %A2 = add i64 %I4, %Prd1 ; Conflict + store i64 %A2, i64* %Dst2 + + ret void +} + +;; Extractions TODO + +; Closure in loop crossing call outside of the loop. Extraction outside of +; loop. Not reassigned (for now at least), due to multiple users. +define void @fun7(i64* %Src0, i64* %Src1, i64* %Dst, i64 %Ref) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun7: IsSSA, TracksLiveness +; CHECK: Unable to reassign: {{.*}} HasCallsUsers +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun7: IsSSA, TracksLiveness + +entry: + br label %bb + +bb: + %lhs = load i64, i64* %Src0 + %rhs = load i64, i64* %Src1 + %res = add i64 %lhs, %rhs + %R2 = add i64 %res, %Ref + store i64 %R2, i64* %Dst + %c = icmp ne i64 %Ref, 0 + br i1 %c, label %exit, label %bb + +exit: + call void @foo(i64 %R2); + store volatile i64 %R2, i64* %Dst + ret void +} + +; Call inside loop: should not be reassigned. +define void @fun8(i64* %Src0, i64* %Src1, i64* %Dst, i64 %Ref) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun8: IsSSA, TracksLiveness +; CHECK: Unable to reassign: {{.*}} HasCalls +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun8: IsSSA, TracksLiveness +entry: + br label %bb + +bb: + %lhs = load i64, i64* %Src0 + %rhs = load i64, i64* %Src1 + %res = add i64 %lhs, %rhs + %R2 = add i64 %res, %Ref + call void @foo2(); + store i64 %R2, i64* %Dst + %c = icmp ne i64 %Ref, 0 + br i1 %c, label %exit, label %bb + +exit: + ret void +} + +; Extraction needed in loop: should not be reassigned. +define void @fun9(i64* %Src0, i64* %Src1, i64* %Dst, i64 %Ref, i64 %F) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun9: IsSSA, TracksLiveness +; CHECK: Unable to reassign: {{.*}} Extraction, (MSGRKC +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun9: IsSSA, TracksLiveness +entry: + br label %bb + +bb: + %lhs = load i64, i64* %Src0 + %rhs = load i64, i64* %Src1 + %res = add i64 %lhs, %rhs + %R2 = add i64 %res, %Ref + %P = mul i64 %R2, %F + store i64 %P, i64* %Dst + %c = icmp ne i64 %Ref, 0 + br i1 %c, label %exit, label %bb + +exit: + ret void +} + +; (scalar) mul after loop: %lhs extracted and %P reinserted to vector domain. +define void @fun10(i64 %arg0, i64 %arg1, i64* %Dst, i64 %arg2, i64 %Ref, i64* %Src0) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun10: IsSSA, TracksLiveness +; CHECK: --- Picked closure for reassignment +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun10: IsSSA, TracksLiveness +; CHECK: bb.0.entry: +; CHECK: [[REG0:%[0-9]+:vr128bit]] = VLVGG +; CHECK: bb.1.bb: +; CHECK: [[REG1:%[0-9]+:vr128bit]] = VLEG +; CHECK: [[REG2:%[0-9]+:vr128bit]] = VAG [[REG1]], [[REG0]] +; CHECK: VSTEG [[REG2]] +; CHECK: bb.2.exit: +; CHECK: [[REG3:%[0-9]+:gr64bit]] = VLGVG [[REG1]] +; CHECK: [[REG4:%[0-9]+:gr64bit]] = MSGRKC {{.*}} [[REG3]] +; CHECK: [[REG5:%[0-9]+:vr128bit]] = VLVGG {{.*}} [[REG4]] +; CHECK: [[REG6:%[0-9]+:vr128bit]] = VAG killed [[REG5]], [[REG2]] +; CHECK: VSTEG killed [[REG6]] + +entry: + br label %bb + +bb: + %lhs = load i64, i64* %Src0 + %res = add i64 %lhs, %arg0 + store volatile i64 %res, i64* %Dst + %c = icmp ne i64 %Ref, 0 + br i1 %c, label %exit, label %bb + +exit: + %P = mul i64 %arg0, %lhs + %A = add i64 %P, %res + store volatile i64 %A, i64* %Dst + ret void +} + +; Reassigned register used in loop and extracted after it (address use). +define void @fun11(i64* %Src, i64* %Dst, i64 %Ref, i64** %Dst2) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun11: IsSSA, TracksLiveness +; CHECK: --- Picked closure for reassignment +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun11: IsSSA, TracksLiveness +; CHECK: bb.0.entry: +; CHECK: [[REG0:%[0-9]+:vr128bit]] = VLVGG +; CHECK: bb.1.bb: +; CHECK: [[REG1:%[0-9]+:vr128bit]] = VLEG +; CHECK: [[REG2:%[0-9]+:vr128bit]] = VESLG killed [[REG1]] +; CHECK: [[REG3:%[0-9]+:vr128bit]] = VAG [[REG0]], killed [[REG2]] +; CHECK: VSTEG [[REG3]] +; CHECK: bb.2.exit: +; CHECK: [[REG4:%[0-9]+:addr64bit]] = VLGVG [[REG3]] +; CHECK: STG {{.*}}, [[REG4]], 0, $noreg + +entry: + br label %bb + +bb: + %Val = load volatile i64, i64* %Src + %Ptr = getelementptr i64, i64* %Dst, i64 %Val + store volatile i64* %Ptr, i64** %Dst2 + %CC = icmp eq i64 %Ref, 0 + br i1 %CC, label %bb, label %exit + +exit: + store volatile i64 %Ref, i64* %Ptr + ret void +} + + +declare void @foo(i64 %V) +declare void @foo2() Index: llvm/test/CodeGen/SystemZ/domain-reassignment-12.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/domain-reassignment-12.ll @@ -0,0 +1,177 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -verify-machineinstrs \ +; RUN: -domain-gprlim=0 2>&1 | FileCheck %s +; +; Test that FP-lane is used in cases crossing calls. + +; Crossing call: reassign and avoid spill by using FP-lane. +define void @fun0(i64* %Src, i64 *%Dst) { +; CHECK-LABEL: fun0: +; CHECK: vleg %v8 +; CHECK: brasl +; CHECK: vsteg %v8 + %L = load i64, i64* %Src + call void @foo() + store i64 %L, i64* %Dst + ret void +} + +; Crossing call before loop: spill around the loop or there would be a copy +; in the loop (don't use FP-lane). +define i64 @fun1(i64* %Src0, i64* %Dst, i64 %Ref) { +; CHECK-LABEL: fun1: +; CHECK: vleg +; CHECK: vst {{.*}} Spill +; CHECK: brasl +; CHECK: vl {{.*}} Reload +; CHECK-LABEL: .LBB1_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsteg +; CHECK-NEXT: cgije {{.*}} .LBB1_1 +entry: + %lhs = load i64, i64* %Src0 + call void @foo(); + br label %loop + +loop: + %iv = phi i64 [ %lhs, %entry ], [ %inc, %loop ] + %inc = add i64 %iv, 1 + store volatile i64 %lhs, i64* %Dst + %CC = icmp eq i64 %Ref, 0 + br i1 %CC, label %loop, label %exit + +exit: + ret i64 0 +} + +; Crossing call inside loop with def outside: Don't reassign - with FP-lane +; the COPY would remain but a vector reg would have to be reloaded. +define i64 @fun2(i64* %Src0, i64* %Dst, i64 %Ref) { +; CHECK-LABEL: fun2: +; CHECK: lg [[REG:%r[0-9]+]] +; CHECK-LABEL: .LBB2_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: brasl +; CHECK-NEXT: stg [[REG]] +; CHECK-NEXT: cgije {{.*}} .LBB2_1 + +entry: + %lhs = load i64, i64* %Src0 + br label %loop + +loop: + %iv = phi i64 [ %lhs, %entry ], [ %inc, %loop ] + %inc = add i64 %iv, 1 + call void @foo(); + store volatile i64 %lhs, i64* %Dst + %CC = icmp eq i64 %Ref, 0 + br i1 %CC, label %loop, label %exit + +exit: + ret i64 0 +} + +; Crossing live-through loop with a call inside: don't reassign. +define i64 @fun3(i64* %Src0, i64* %Dst, i64 %Ref) { +; CHECK-LABEL: fun3: +; CHECK: lg [[REG:%r[0-9]+]] +; CHECK-LABEL: .LBB3_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: brasl +; CHECK-NEXT: cgije {{.*}} .LBB3_1 +; CHECK-LABEL: # %bb.2: +; CHECK-NEXT: stg [[REG]] + +entry: + %lhs = load volatile i64, i64* %Src0 + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %inc, %loop ] + %inc = add i64 %iv, 1 + call void @foo(); + %CC = icmp eq i64 %Ref, 0 + br i1 %CC, label %loop, label %exit + +exit: + store volatile i64 %lhs, i64* %Dst + ret i64 0 +} + +; def and use crossing call inside loop: reassign. +define i64 @fun4(i64* %Src0, i64* %Dst, i64 %Ref) { +; CHECK-LABEL: fun4: +; CHECK-LABEL: .LBB4_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK: vleg %v8 +; CHECK: brasl +; CHECK: vsteg %v8 +; CHECK-NEXT: cgije {{.*}} .LBB4_1 +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %inc, %loop ] + %inc = add i64 %iv, 1 + %lhs = load volatile i64, i64* %Src0 + call void @foo(); + store volatile i64 %lhs, i64* %Dst + %CC = icmp eq i64 %Ref, 0 + br i1 %CC, label %loop, label %exit + +exit: + ret i64 0 +} + +; def in loop with call call, used after loop: reassign. +define i64 @fun5(i64* %Src0, i64* %Dst, i64 %Ref) { +; CHECK-LABEL: fun5: +; CHECK-LABEL: .LBB5_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK: vleg %v8 +; CHECK: brasl +; CHECK-NEXT: cgije {{.*}} .LBB5_1 +; CHECK-LABEL: # %bb.2: +; CHECK: vsteg %v8 +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %inc, %loop ] + %inc = add i64 %iv, 1 + %lhs = load volatile i64, i64* %Src0 + call void @foo(); + %CC = icmp eq i64 %Ref, 0 + br i1 %CC, label %loop, label %exit + +exit: + store volatile i64 %lhs, i64* %Dst + ret i64 0 +} + +; def crossing a call and then used my mulitple PHIs: don't reassign. +define i32 @fun6(i32 %arg) { +; CHECK-LABEL: fun6: +; CHECK: lb %r13 +; CHECK: brasl +; CHECK: stc %r13 +bb1: + br i1 undef, label %bb2, label %bb3 + +bb2: + %i = load i8, i8* undef, align 4 + call void @foo() + br i1 undef, label %bb4, label %bb3 + +bb3: + %i5 = phi i8 [ %i, %bb2 ], [ undef, %bb1 ] + br label %bb4 + +bb4: + %i7 = phi i8 [ %i5, %bb3 ], [ %i, %bb2 ] + %i8 = phi i32 [ 0, %bb3 ], [ 1, %bb2 ] + store i8 %i7, i8* undef, align 4 + ret i32 %i8 +} + + +declare void @foo() Index: llvm/test/CodeGen/SystemZ/domain-reassignment-13.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/domain-reassignment-13.ll @@ -0,0 +1,93 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -verify-machineinstrs 2>&1 \ +; RUN: -domain-gprlim=0 | FileCheck %s +; +; Test that inserted loads of constants are hoisted out of loops. + +; Disabled for now... +define void @fun0(<4 x i8> %B15) { +; CHECK-LABEL: fun0 +; XHECK: vzero +; XHECK: vleif +; XHECK: =>This Inner Loop Header +BB: + br label %CF34 + +CF34: + %Tr24 = trunc <4 x i8> %B15 to <4 x i1> + %E28 = extractelement <4 x i1> %Tr24, i32 3 + br i1 %E28, label %CF34, label %CF36 + +CF36: + ret void +} + +define void @fun1(i64* %Src0, i64* %Src1, i64* %Dst, i1 %Cond) { +; CHECK-LABEL: fun1 +; XHECK: vleig +; XHECK: =>This Inner Loop Header +entry: + br label %loop + +loop: + %LHS = load i64, i64* %Src0 + %RHS = load i64, i64* %Src1 + %Sum = sub i64 %LHS, %RHS + %Res = add i64 %Sum, -16 + store i64 %Res, i64* %Dst + br i1 %Cond, label %loop, label %exit + +exit: + ret void +} + +; TODO: This is disabled because running MachineCSE again (trunk unpatched +; otherwise) gav regression. +define void @fun2(i32* %Src0, i32* %Src1, i32* %Src2, i32* %Dst, i1 %Cond) { +; CHECK-LABEL: fun2 +; XHECK: vleif +; XHECK-NOT: vleif +entry: + %LHS = load i32, i32* %Src0 + %RHS = load i32, i32* %Src1 + %Sum = sub i32 %LHS, %RHS + %A = add i32 %Sum, -16 + + %L2 = load i32, i32* %Src2 + %S2 = sub i32 %LHS, %L2 + %A2 = add i32 %S2, -16 + + %Res = mul i32 %A, %A2 + store i32 %Res, i32* %Dst + ret void +} + +; TODO: An instruction that has a converter that does not accept it should be +; inserted with -domreass-inserts +define void @fun3(i64 %arg, i64* %Dst) { + store i64 %arg, i64* %Dst + ret void +} + +; Test involving a Load Address +define void @fun4(i64* %Src, i64* %Dst) { +; CHECK-LABEL: fun4 +; CHECK: vleg +; CHECK: veslg +; CHECK: vleig +; CHECK: vag +; CHECK: vsteg + %i = load i64, i64* %Src + %i4 = shl i64 %i, 8 + %i5 = or i64 %i4, 255 + store i64 %i5, i64* %Dst + ret void +} + +; ; Test involving RISBGN TODO +; define void @fun5(i64* %Src, i64* %Dst) { +; %L = load i64, i64* %Src +; %A = and i64 %L, 3 +; store i64 %A, i64* %Dst +; ret void +; } + Index: llvm/test/CodeGen/SystemZ/domain-reassignment-14.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/domain-reassignment-14.ll @@ -0,0 +1,250 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -debug-only=systemz-domain-reassignment \ +; RUN: -verify-machineinstrs 2>&1 | FileCheck %s +; + +; Only ressign sufficient number of closures. +define void @fun0(i64* %Src, i64* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun0: IsSSA, TracksLiveness +; CHECK: --- Reassigning closure: +; CHECK: --- Reassigning closure: +; CHECK: --- Reassigning closure: +; CHECK: --- Reassigning closure: +; CHECK: --- Reassigning closure: +; CHECK-NOT: --- Reassigning closure: +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun0: IsSSA, TracksLiveness + + %val0 = load volatile i64, i64 *%Src + %val1 = load volatile i64, i64 *%Src + %val2 = load volatile i64, i64 *%Src + %val3 = load volatile i64, i64 *%Src + %val4 = load volatile i64, i64 *%Src + %val5 = load volatile i64, i64 *%Src + %val6 = load volatile i64, i64 *%Src + %val7 = load volatile i64, i64 *%Src + %val8 = load volatile i64, i64 *%Src + %val9 = load volatile i64, i64 *%Src + %val10 = load volatile i64, i64 *%Src + %val11 = load volatile i64, i64 *%Src + %val12 = load volatile i64, i64 *%Src + %val13 = load volatile i64, i64 *%Src + %val14 = load volatile i64, i64 *%Src + %val15 = load volatile i64, i64 *%Src + %val16 = load volatile i64, i64 *%Src + %val17 = load volatile i64, i64 *%Src + %val18 = load volatile i64, i64 *%Src + + store volatile i64 %val0, i64* %Dst + store volatile i64 %val1, i64* %Dst + store volatile i64 %val2, i64* %Dst + store volatile i64 %val3, i64* %Dst + store volatile i64 %val4, i64* %Dst + store volatile i64 %val5, i64* %Dst + store volatile i64 %val6, i64* %Dst + store volatile i64 %val7, i64* %Dst + store volatile i64 %val8, i64* %Dst + store volatile i64 %val9, i64* %Dst + store volatile i64 %val10, i64* %Dst + store volatile i64 %val11, i64* %Dst + store volatile i64 %val12, i64* %Dst + store volatile i64 %val13, i64* %Dst + store volatile i64 %val14, i64* %Dst + store volatile i64 %val15, i64* %Dst + store volatile i64 %val16, i64* %Dst + store volatile i64 %val17, i64* %Dst + store volatile i64 %val18, i64* %Dst + + ret void +} + +; Do not reassign closure in loop - regs are live-through +; Do not reassign closure used in loop. +define void @fun1(i64* %Src, i64* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun1: IsSSA, TracksLiveness + +; CHECK: bb.0 (%ir-block.0): +; CHECK: %0:gr64bit = LG %19:addr64bit +; CHECK: %1:gr64bit = LG %19:addr64bit + +; CHECK: bb.1.for.body: +; CHECK: %21:gr64bit = LG %19:addr64bit +; CHECK: STG killed %21:gr64bit +; CHECK: CGHI %0:gr64bit, 0 + +; CHECK: bb.2.exit: +; CHECK: STG %0:gr64bit + +; CHECK: Entering bb.0 +; CHECK-NOT: --- Picked closure for reassignment: Registers: %0 +; CHECK: Entering bb.1.for.body +; CHECK-NOT: --- Picked closure for reassignment: +; CHECK: Entering bb.2.exit +; CHECK-NOT: --- Picked closure for reassignment: + +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun1: IsSSA, TracksLiveness + %val0 = load volatile i64, i64 *%Src + %val1 = load volatile i64, i64 *%Src + %val2 = load volatile i64, i64 *%Src + %val3 = load volatile i64, i64 *%Src + %val4 = load volatile i64, i64 *%Src + %val5 = load volatile i64, i64 *%Src + %val6 = load volatile i64, i64 *%Src + %val7 = load volatile i64, i64 *%Src + %val8 = load volatile i64, i64 *%Src + %val9 = load volatile i64, i64 *%Src + %val10 = load volatile i64, i64 *%Src + %val11 = load volatile i64, i64 *%Src + %val12 = load volatile i64, i64 *%Src + %val13 = load volatile i64, i64 *%Src + %val14 = load volatile i64, i64 *%Src + %val15 = load volatile i64, i64 *%Src + %val16 = load volatile i64, i64 *%Src + %val17 = load volatile i64, i64 *%Src + %val18 = load volatile i64, i64 *%Src + br label %for.body + +for.body: + + %val19 = load volatile i64, i64 *%Src + store volatile i64 %val19, i64* %Dst + %cmp = icmp eq i64 %val0, 0 + br i1 %cmp, label %for.body, label %exit + +exit: + store volatile i64 %val0, i64* %Dst + store volatile i64 %val1, i64* %Dst + store volatile i64 %val2, i64* %Dst + store volatile i64 %val3, i64* %Dst + store volatile i64 %val4, i64* %Dst + store volatile i64 %val5, i64* %Dst + store volatile i64 %val6, i64* %Dst + store volatile i64 %val7, i64* %Dst + store volatile i64 %val8, i64* %Dst + store volatile i64 %val9, i64* %Dst + store volatile i64 %val10, i64* %Dst + store volatile i64 %val11, i64* %Dst + store volatile i64 %val12, i64* %Dst + store volatile i64 %val13, i64* %Dst + store volatile i64 %val14, i64* %Dst + store volatile i64 %val15, i64* %Dst + store volatile i64 %val16, i64* %Dst + store volatile i64 %val17, i64* %Dst + store volatile i64 %val18, i64* %Dst + + ret void +} + +; Do not reassign closure in loop that is only live-through +define void @fun2(i64* %Src, i64* %Dst) { +; CHECK: ***** Machine Function before Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun2: IsSSA, TracksLiveness + +; CHECK: bb.0 (%ir-block.0): +; CHECK: %0:gr64bit = LG %3:addr64bit +; CHECK: %1:gr64bit = LG %3:addr64bit +; CHECK: %2:gr64bit = LG %3:addr64bit + +; CHECK: bb.1.for.body: +; CHECK: STG %1:gr64bit +; CHECK-NOT: %2:gr64bit +; CHECK: CGHI %0:gr64bit, 0 + +; CHECK: bb.2.exit: +; CHECK: STG %0:gr64bit +; CHECK: STG %1:gr64bit +; CHECK: STG %2:gr64bit + +; CHECK: Entering bb.0 +; CHECK-NOT: --- Picked closure for reassignment: +; CHECK: Entering bb.1.for.body +; CHECK: Picked closure for reassignment: Registers: %1 +; CHECK: Picked closure for reassignment: Registers: %0 +; CHECK-NOT: --- Picked closure for reassignment: +; CHECK: Entering bb.2.exit +; CHECK-NOT: --- Picked closure for reassignment: + +; CHECK: ***** Machine Function after Domain Reassignment ***** +; CHECK-NEXT: # Machine code for function fun2: IsSSA, TracksLiveness + + %val0 = load volatile i64, i64 *%Src + %val1 = load volatile i64, i64 *%Src + %val2 = load volatile i64, i64 *%Src + br label %for.body + +for.body: + + %val3 = load volatile i64, i64 *%Src + %P3 = mul i64 %val3, 3 + + %val4 = load volatile i64, i64 *%Src + %P4 = mul i64 %val4, 3 + + %val5 = load volatile i64, i64 *%Src + %P5 = mul i64 %val5, 3 + + %val6 = load volatile i64, i64 *%Src + %P6 = mul i64 %val6, 3 + + %val7 = load volatile i64, i64 *%Src + %P7 = mul i64 %val7, 3 + + %val8 = load volatile i64, i64 *%Src + %P8 = mul i64 %val8, 3 + + %val9 = load volatile i64, i64 *%Src + %P9 = mul i64 %val9, 3 + + %val10 = load volatile i64, i64 *%Src + %P10 = mul i64 %val10, 3 + + %val11 = load volatile i64, i64 *%Src + %P11 = mul i64 %val11, 3 + + %val12 = load volatile i64, i64 *%Src + %P12 = mul i64 %val12, 3 + + %val13 = load volatile i64, i64 *%Src + %P13 = mul i64 %val13, 3 + + %val14 = load volatile i64, i64 *%Src + %P14 = mul i64 %val14, 3 + + %val15 = load volatile i64, i64 *%Src + %P15 = mul i64 %val15, 3 + + %val16 = load volatile i64, i64 *%Src + %P16 = mul i64 %val16, 3 + + %val17 = load volatile i64, i64 *%Src + %P17 = mul i64 %val17, 3 + + store volatile i64 %val1, i64* %Dst + store volatile i64 %P3, i64* %Dst + store volatile i64 %P4, i64* %Dst + store volatile i64 %P5, i64* %Dst + store volatile i64 %P6, i64* %Dst + store volatile i64 %P7, i64* %Dst + store volatile i64 %P8, i64* %Dst + store volatile i64 %P9, i64* %Dst + store volatile i64 %P10, i64* %Dst + store volatile i64 %P11, i64* %Dst + store volatile i64 %P12, i64* %Dst + store volatile i64 %P13, i64* %Dst + store volatile i64 %P14, i64* %Dst + store volatile i64 %P15, i64* %Dst + store volatile i64 %P16, i64* %Dst + store volatile i64 %P17, i64* %Dst + + %cmp = icmp eq i64 %val0, 0 + br i1 %cmp, label %for.body, label %exit + +exit: + store volatile i64 %val0, i64* %Dst + store volatile i64 %val1, i64* %Dst + store volatile i64 %val2, i64* %Dst + + ret void +} Index: llvm/test/CodeGen/SystemZ/domain-reassignment-15.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/domain-reassignment-15.ll @@ -0,0 +1,237 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -verify-machineinstrs \ +; RUN: -domain-gprlim=0 2>&1 | FileCheck %s + +; Test reassignment of comparisons to Vector Element Compare. + +define void @fun0(i64* %Src0, i64* %Src1, i64* %Dst) { +; CHECK-LABEL: fun0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vleg %v0, 0(%r2), 0 +; CHECK-NEXT: vleg %v1, 0(%r3), 0 +; CHECK-NEXT: vecg %v0, %v1 +; CHECK-NEXT: jlh .LBB0_2 +; CHECK-NEXT: # %bb.1: # %bb1 +; CHECK-NEXT: vleig %v1, 8, 0 +; CHECK-NEXT: vecg %v0, %v1 +; CHECK-NEXT: vsteg %v0, 0(%r4), 0 +; CHECK-NEXT: ber %r14 +; CHECK-NEXT: .LBB0_2: # %bb2 +; CHECK-NEXT: vsteg %v0, 0(%r4), 0 +; CHECK-NEXT: br %r14 +entry: + %L0 = load volatile i64, i64* %Src0 + %L1 = load volatile i64, i64* %Src1 + %cc = icmp eq i64 %L0, %L1 + br i1 %cc, label %bb1, label %bb2 + +bb1: + store volatile i64 %L0, i64* %Dst + %cc2 = icmp eq i64 %L0, 8 + br i1 %cc2, label %bb1b, label %bb2 + +bb1b: + ret void + +bb2: + store volatile i64 %L0, i64* %Dst + ret void +} + +define void @fun1(i32* %Src0, i32* %Src1, i32* %Dst) { +; CHECK-LABEL: fun1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vlef %v0, 0(%r2), 1 +; CHECK-NEXT: vlef %v1, 0(%r3), 1 +; CHECK-NEXT: vecf %v0, %v1 +; CHECK-NEXT: jlh .LBB1_2 +; CHECK-NEXT: # %bb.1: # %bb1 +; CHECK-NEXT: vleif %v1, 8, 1 +; CHECK-NEXT: vecf %v0, %v1 +; CHECK-NEXT: vstef %v0, 0(%r4), 1 +; CHECK-NEXT: ber %r14 +; CHECK-NEXT: .LBB1_2: # %bb2 +; CHECK-NEXT: vstef %v0, 0(%r4), 1 +; CHECK-NEXT: br %r14 +entry: + %L0 = load volatile i32, i32* %Src0 + %L1 = load volatile i32, i32* %Src1 + %cc = icmp eq i32 %L0, %L1 + br i1 %cc, label %bb1, label %bb2 + +bb1: + store volatile i32 %L0, i32* %Dst + %cc2 = icmp eq i32 %L0, 8 + br i1 %cc2, label %bb1b, label %bb2 + +bb1b: + ret void + +bb2: + store volatile i32 %L0, i32* %Dst + ret void +} + +define void @fun2(i64* %Src0, i64* %Src1, i64* %Dst) { +; CHECK-LABEL: fun2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vleg %v0, 0(%r2), 0 +; CHECK-NEXT: vleg %v1, 0(%r3), 0 +; CHECK-NEXT: veclg %v0, %v1 +; CHECK-NEXT: jhe .LBB2_2 +; CHECK-NEXT: # %bb.1: # %bb1 +; CHECK-NEXT: vleig %v1, 9, 0 +; CHECK-NEXT: veclg %v0, %v1 +; CHECK-NEXT: vsteg %v0, 0(%r4), 0 +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB2_2: # %bb2 +; CHECK-NEXT: vsteg %v0, 0(%r4), 0 +; CHECK-NEXT: br %r14 +entry: + %L0 = load volatile i64, i64* %Src0 + %L1 = load volatile i64, i64* %Src1 + %cc = icmp ult i64 %L0, %L1 + br i1 %cc, label %bb1, label %bb2 + +bb1: + store volatile i64 %L0, i64* %Dst + %cc2 = icmp ugt i64 %L0, 8 + br i1 %cc2, label %bb1b, label %bb2 + +bb1b: + ret void + +bb2: + store volatile i64 %L0, i64* %Dst + ret void +} + +define void @fun3(i32* %Src0, i32* %Src1, i32* %Dst) { +; CHECK-LABEL: fun3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vlef %v0, 0(%r2), 1 +; CHECK-NEXT: vlef %v1, 0(%r3), 1 +; CHECK-NEXT: veclf %v0, %v1 +; CHECK-NEXT: jhe .LBB3_2 +; CHECK-NEXT: # %bb.1: # %bb1 +; CHECK-NEXT: vleif %v1, 9, 1 +; CHECK-NEXT: veclf %v0, %v1 +; CHECK-NEXT: vstef %v0, 0(%r4), 1 +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB3_2: # %bb2 +; CHECK-NEXT: vstef %v0, 0(%r4), 1 +; CHECK-NEXT: br %r14 +entry: + %L0 = load volatile i32, i32* %Src0 + %L1 = load volatile i32, i32* %Src1 + %cc = icmp ult i32 %L0, %L1 + br i1 %cc, label %bb1, label %bb2 + +bb1: + store volatile i32 %L0, i32* %Dst + %cc2 = icmp ugt i32 %L0, 8 + br i1 %cc2, label %bb1b, label %bb2 + +bb1b: + ret void + +bb2: + store volatile i32 %L0, i32* %Dst + ret void +} + +define void @fun4(i64* %Src0, i64* %Src1, i64* %Dst) { +; CHECK-LABEL: fun4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vleg %v0, 0(%r2), 0 +; CHECK-NEXT: vleg %v1, 0(%r3), 0 +; CHECK-NEXT: vecg %v0, %v1 +; CHECK-NEXT: blhr %r14 +; CHECK-NEXT: .LBB4_1: # %bb1 +; CHECK-NEXT: mvghi 0(%r4), 0 +; CHECK-NEXT: br %r14 +entry: + %L0 = load volatile i64, i64* %Src0 + %L1 = load volatile i64, i64* %Src1 + %cc = icmp eq i64 %L0, %L1 + br i1 %cc, label %bb1, label %bb2 + +bb1: + store volatile i64 0, i64* %Dst + br label %bb2 + +bb2: + ret void +} + +define void @fun5(i32* %Src0, i32* %Src1, i32* %Dst) { +; CHECK-LABEL: fun5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vlef %v0, 0(%r2), 1 +; CHECK-NEXT: vlef %v1, 0(%r3), 1 +; CHECK-NEXT: vecf %v0, %v1 +; CHECK-NEXT: blhr %r14 +; CHECK-NEXT: .LBB5_1: # %bb1 +; CHECK-NEXT: mvhi 0(%r4), 0 +; CHECK-NEXT: br %r14 +entry: + %L0 = load volatile i32, i32* %Src0 + %L1 = load volatile i32, i32* %Src1 + %cc = icmp eq i32 %L0, %L1 + br i1 %cc, label %bb1, label %bb2 + +bb1: + store volatile i32 0, i32* %Dst + br label %bb2 + +bb2: + ret void +} + +define void @fun6(i64* %Src0, i64* %Src1, i64* %Dst) { +; CHECK-LABEL: fun6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vleg %v0, 0(%r2), 0 +; CHECK-NEXT: vleg %v1, 0(%r3), 0 +; CHECK-NEXT: veclg %v0, %v1 +; CHECK-NEXT: bler %r14 +; CHECK-NEXT: .LBB6_1: # %bb1 +; CHECK-NEXT: mvghi 0(%r4), 0 +; CHECK-NEXT: br %r14 +entry: + %L0 = load volatile i64, i64* %Src0 + %L1 = load volatile i64, i64* %Src1 + %cc = icmp ugt i64 %L0, %L1 + br i1 %cc, label %bb1, label %bb2 + +bb1: + store volatile i64 0, i64* %Dst + br label %bb2 + +bb2: + ret void +} + +define void @fun7(i32* %Src0, i32* %Src1, i32* %Dst) { +; CHECK-LABEL: fun7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vlef %v0, 0(%r2), 1 +; CHECK-NEXT: vlef %v1, 0(%r3), 1 +; CHECK-NEXT: veclf %v0, %v1 +; CHECK-NEXT: bler %r14 +; CHECK-NEXT: .LBB7_1: # %bb1 +; CHECK-NEXT: mvhi 0(%r4), 0 +; CHECK-NEXT: br %r14 +entry: + %L0 = load volatile i32, i32* %Src0 + %L1 = load volatile i32, i32* %Src1 + %cc = icmp ugt i32 %L0, %L1 + br i1 %cc, label %bb1, label %bb2 + +bb1: + store volatile i32 0, i32* %Dst + br label %bb2 + +bb2: + ret void +} Index: llvm/test/CodeGen/SystemZ/knownbits.ll =================================================================== --- llvm/test/CodeGen/SystemZ/knownbits.ll +++ llvm/test/CodeGen/SystemZ/knownbits.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -disable-domreass < %s | FileCheck %s ; Test that DAGCombiner gets helped by computeKnownBitsForTargetNode(). Index: llvm/test/CodeGen/SystemZ/stack-clash-protection.ll =================================================================== --- llvm/test/CodeGen/SystemZ/stack-clash-protection.ll +++ llvm/test/CodeGen/SystemZ/stack-clash-protection.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 -disable-domreass | FileCheck %s ; ; Test stack clash protection probing for static allocas. Index: llvm/test/CodeGen/SystemZ/subregliveness-04.ll =================================================================== --- llvm/test/CodeGen/SystemZ/subregliveness-04.ll +++ llvm/test/CodeGen/SystemZ/subregliveness-04.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -disable-early-taildup -disable-cgp -systemz-subreg-liveness < %s | FileCheck %s +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -disable-early-taildup -disable-cgp \ +; RUN: -systemz-subreg-liveness -disable-domreass < %s | FileCheck %s ; Check for successful compilation. ; CHECK: lhi %r0, -5 Index: llvm/test/CodeGen/SystemZ/tls-08.ll =================================================================== --- llvm/test/CodeGen/SystemZ/tls-08.ll +++ llvm/test/CodeGen/SystemZ/tls-08.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mcpu=z196 -mtriple=s390x-linux-gnu -O0 \ -; RUN: -stop-before=regallocfast 2>&1 | FileCheck %s +; RUN: -stop-before=regallocfast -disable-domreass 2>&1 | FileCheck %s ; RUN: llc < %s -mcpu=z196 -mtriple=s390x-linux-gnu -O3 \ -; RUN: -stop-before=livevars 2>&1 | FileCheck %s +; RUN: -stop-before=livevars -disable-domreass 2>&1 | FileCheck %s ; ; Test that copies to/from access registers are handled before regalloc with ; GR32 regs. Index: llvm/test/CodeGen/SystemZ/vec-trunc-to-i1.ll =================================================================== --- llvm/test/CodeGen/SystemZ/vec-trunc-to-i1.ll +++ llvm/test/CodeGen/SystemZ/vec-trunc-to-i1.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -domain-force-vlgv=false \ +; RUN: | FileCheck %s ; ; Check that a widening truncate to a vector of i1 elements can be handled.