Index: include/llvm/CodeGen/TargetRegisterInfo.h =================================================================== --- include/llvm/CodeGen/TargetRegisterInfo.h +++ include/llvm/CodeGen/TargetRegisterInfo.h @@ -818,6 +818,23 @@ const LiveRegMatrix *Matrix = nullptr) const; + /// Let target reject replacing the previously allocated phys reg with \p + /// NewPhysReg, by returning false. This would typically happen where + /// target has taken care in getRegAllocationHints() to have only a subset + /// of the available registers made available and it is important not to + /// loose this restriction. A better solution would probably be to do like + /// in gcc and provide legal operand assignment combinations at the + /// instruction level. On SystemZ some 32 bit instructions can use either + /// high or low parts of 64 bit registers, but some operand combinations + /// are not possible. For instance, CRMux (Compare Register) can be lowered + /// to C (LOW LOW), CHHR (HIGH, HIGH) or CHLR (HIGH, LOW), but (LOW, HIGH) + /// is *not* supported. + virtual bool allowHintRecoloring(unsigned VReg, + unsigned NewPhysReg, + const VirtRegMap &VRM) const { + return true; + } + /// A callback to allow target a chance to update register allocation hints /// when a register is "changed" (e.g. coalesced) to another register. /// e.g. On ARM, some virtual registers should target register pairs, Index: lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- lib/CodeGen/RegAllocGreedy.cpp +++ lib/CodeGen/RegAllocGreedy.cpp @@ -2966,6 +2966,13 @@ LLVM_DEBUG(dbgs() << "=> Not profitable.\n"); continue; } + + // Check with target for any special replacements to avoid. + if (!TRI->allowHintRecoloring(Reg, PhysReg, *VRM)) { + LLVM_DEBUG(dbgs() << "=> Not suitable.\n"); + continue; + } + // At this point, the cost is either cheaper or equal. If it is // equal, we consider this is profitable because it may expose // more recoloring opportunities. Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -1170,7 +1170,9 @@ if (addRegAssignmentOptimized()) { // Allow targets to expand pseudo instructions depending on the choice of - // registers before MachineCopyPropagation. + // registers. If special requirements are involved (see comment for + // allowHintRecoloring()), it is important that this is done before + // MachineCopyPropagation. addPostRewrite(); // Copy propagate to forward register uses and try to eliminate COPYs that Index: lib/Target/SystemZ/CMakeLists.txt =================================================================== --- lib/Target/SystemZ/CMakeLists.txt +++ lib/Target/SystemZ/CMakeLists.txt @@ -17,7 +17,6 @@ SystemZCallingConv.cpp SystemZConstantPoolValue.cpp SystemZElimCompare.cpp - SystemZExpandPseudo.cpp SystemZFrameLowering.cpp SystemZHazardRecognizer.cpp SystemZISelDAGToDAG.cpp Index: lib/Target/SystemZ/SystemZ.h =================================================================== --- lib/Target/SystemZ/SystemZ.h +++ lib/Target/SystemZ/SystemZ.h @@ -190,7 +190,6 @@ FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); -FunctionPass *createSystemZExpandPseudoPass(SystemZTargetMachine &TM); FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); Index: lib/Target/SystemZ/SystemZElimCompare.cpp =================================================================== --- lib/Target/SystemZ/SystemZElimCompare.cpp +++ lib/Target/SystemZ/SystemZElimCompare.cpp @@ -103,14 +103,6 @@ } // end anonymous namespace -// Return true if CC is live out of MBB. -static bool isCCLiveOut(MachineBasicBlock &MBB) { - for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) - if ((*SI)->isLiveIn(SystemZ::CC)) - return true; - return false; -} - // Returns true if MI is an instruction whose output equals the value in Reg. static bool preservesValueOf(MachineInstr &MI, unsigned Reg) { switch (MI.getOpcode()) { @@ -598,7 +590,7 @@ // Walk backwards through the block looking for comparisons, recording // all CC users as we go. The subroutines can delete Compare and // instructions before it. - bool CompleteCCUsers = !isCCLiveOut(MBB); + bool CompleteCCUsers = !SystemZRegisterInfo::isCCLiveOut(MBB); SmallVector CCUsers; MachineBasicBlock::iterator MBBI = MBB.end(); while (MBBI != MBB.begin()) { Index: lib/Target/SystemZ/SystemZExpandPseudo.cpp =================================================================== --- lib/Target/SystemZ/SystemZExpandPseudo.cpp +++ /dev/null @@ -1,152 +0,0 @@ -//==-- SystemZExpandPseudo.cpp - Expand pseudo instructions -------*- C++ -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass that expands pseudo instructions into target -// instructions to allow proper scheduling and other late optimizations. This -// pass should be run after register allocation but before the post-regalloc -// scheduling pass. -// -//===----------------------------------------------------------------------===// - -#include "SystemZ.h" -#include "SystemZInstrInfo.h" -#include "SystemZSubtarget.h" -#include "llvm/CodeGen/LivePhysRegs.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -using namespace llvm; - -#define SYSTEMZ_EXPAND_PSEUDO_NAME "SystemZ pseudo instruction expansion pass" - -namespace llvm { - void initializeSystemZExpandPseudoPass(PassRegistry&); -} - -namespace { -class SystemZExpandPseudo : public MachineFunctionPass { -public: - static char ID; - SystemZExpandPseudo() : MachineFunctionPass(ID) { - initializeSystemZExpandPseudoPass(*PassRegistry::getPassRegistry()); - } - - const SystemZInstrInfo *TII; - - bool runOnMachineFunction(MachineFunction &Fn) override; - - StringRef getPassName() const override { return SYSTEMZ_EXPAND_PSEUDO_NAME; } - -private: - bool expandMBB(MachineBasicBlock &MBB); - bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); - bool expandLOCRMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); -}; -char SystemZExpandPseudo::ID = 0; -} - -INITIALIZE_PASS(SystemZExpandPseudo, "systemz-expand-pseudo", - SYSTEMZ_EXPAND_PSEUDO_NAME, false, false) - -/// Returns an instance of the pseudo instruction expansion pass. -FunctionPass *llvm::createSystemZExpandPseudoPass(SystemZTargetMachine &TM) { - return new SystemZExpandPseudo(); -} - -// MI is a load-register-on-condition pseudo instruction that could not be -// handled as a single hardware instruction. Replace it by a branch sequence. -bool SystemZExpandPseudo::expandLOCRMux(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { - MachineFunction &MF = *MBB.getParent(); - const BasicBlock *BB = MBB.getBasicBlock(); - MachineInstr &MI = *MBBI; - DebugLoc DL = MI.getDebugLoc(); - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(2).getReg(); - unsigned CCValid = MI.getOperand(3).getImm(); - unsigned CCMask = MI.getOperand(4).getImm(); - - LivePhysRegs LiveRegs(TII->getRegisterInfo()); - LiveRegs.addLiveOuts(MBB); - for (auto I = std::prev(MBB.end()); I != MBBI; --I) - LiveRegs.stepBackward(*I); - - // Splice MBB at MI, moving the rest of the block into RestMBB. - MachineBasicBlock *RestMBB = MF.CreateMachineBasicBlock(BB); - MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB); - RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end()); - RestMBB->transferSuccessors(&MBB); - for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) - RestMBB->addLiveIn(*I); - - // Create a new block MoveMBB to hold the move instruction. - MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB); - MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB); - MoveMBB->addLiveIn(SrcReg); - for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) - MoveMBB->addLiveIn(*I); - - // At the end of MBB, create a conditional branch to RestMBB if the - // condition is false, otherwise fall through to MoveMBB. - BuildMI(&MBB, DL, TII->get(SystemZ::BRC)) - .addImm(CCValid).addImm(CCMask ^ CCValid).addMBB(RestMBB); - MBB.addSuccessor(RestMBB); - MBB.addSuccessor(MoveMBB); - - // In MoveMBB, emit an instruction to move SrcReg into DestReg, - // then fall through to RestMBB. - TII->copyPhysReg(*MoveMBB, MoveMBB->end(), DL, DestReg, SrcReg, - MI.getOperand(2).isKill()); - MoveMBB->addSuccessor(RestMBB); - - NextMBBI = MBB.end(); - MI.eraseFromParent(); - return true; -} - -/// If MBBI references a pseudo instruction that should be expanded here, -/// do the expansion and return true. Otherwise return false. -bool SystemZExpandPseudo::expandMI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { - MachineInstr &MI = *MBBI; - switch (MI.getOpcode()) { - case SystemZ::LOCRMux: - return expandLOCRMux(MBB, MBBI, NextMBBI); - default: - break; - } - return false; -} - -/// Iterate over the instructions in basic block MBB and expand any -/// pseudo instructions. Return true if anything was modified. -bool SystemZExpandPseudo::expandMBB(MachineBasicBlock &MBB) { - bool Modified = false; - - MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - while (MBBI != E) { - MachineBasicBlock::iterator NMBBI = std::next(MBBI); - Modified |= expandMI(MBB, MBBI, NMBBI); - MBBI = NMBBI; - } - - return Modified; -} - -bool SystemZExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - TII = static_cast(MF.getSubtarget().getInstrInfo()); - - bool Modified = false; - for (auto &MBB : MF) - Modified |= expandMBB(MBB); - return Modified; -} - Index: lib/Target/SystemZ/SystemZInstrFormats.td =================================================================== --- lib/Target/SystemZ/SystemZInstrFormats.td +++ lib/Target/SystemZ/SystemZInstrFormats.td @@ -4592,6 +4592,17 @@ let OpType = "reg"; } +// A GRX32 binop pseudo expanded after regalloc. +class BinaryRRX32Pseudo + : Pseudo<(outs GRX32:$R1), (ins GRX32:$R2, GRX32:$R3), + [(set GRX32:$R1, (operator GRX32:$R2, GRX32:$R3))]> { + let Predicates = [FeatureHighWord]; + let NumOpsKey = lowmnemonic; + let NumOpsValue = "3"; + let OpKey = lowmnemonic#"k"#GR32; + let OpType = "reg"; +} + // Like BinaryRI, but expanded after RA depending on the choice of register. class BinaryRIPseudo @@ -4656,6 +4667,16 @@ let AccessBytes = bytes; } +// A GRX32 compare pseudo expanded after regalloc. +class CompareRRX32Pseudo + : Pseudo<(outs), (ins GRX32:$R1, GRX32:$R2), + [(set CC, (operator GRX32:$R1, GRX32:$R2))]> { + let isCompare = 1; + let Predicates = [FeatureHighWord]; + let OpKey = lowmnemonic#GR32; + let OpType = "reg"; +} + // Like TestBinarySIL, but expanded later. class TestBinarySILPseudo : Pseudo<(outs), (ins bdaddr12only:$BD1, imm:$I2), Index: lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.h +++ lib/Target/SystemZ/SystemZInstrInfo.h @@ -142,6 +142,8 @@ } // end namespace SystemZII namespace SystemZ { +// Note: getTwoOperandOpcode() also contains mappings from GRX32Mux pseudos +// to the corresponing 2-address instructions, for instance ARMux -> AR. int getTwoOperandOpcode(uint16_t Opcode); int getTargetMemOpcode(uint16_t Opcode); } @@ -160,8 +162,6 @@ unsigned HighOpcode) const; void expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode) const; - void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, - unsigned HighOpcode) const; void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned Size) const; void expandLoadStackGuard(MachineInstr *MI) const; Index: lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.cpp +++ lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -46,7 +46,6 @@ #include "SystemZGenInstrInfo.inc" #define DEBUG_TYPE "systemz-II" -STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)"); // Return a mask with Count low bits set. static uint64_t allOnes(unsigned int Count) { @@ -200,29 +199,6 @@ MI.setDesc(get(Opcode)); } -// MI is a load-register-on-condition pseudo instruction. Replace it with -// LowOpcode if source and destination are both low GR32s and HighOpcode if -// source and destination are both high GR32s. -void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, - unsigned HighOpcode) const { - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(2).getReg(); - bool DestIsHigh = isHighReg(DestReg); - bool SrcIsHigh = isHighReg(SrcReg); - - if (!DestIsHigh && !SrcIsHigh) - MI.setDesc(get(LowOpcode)); - else if (DestIsHigh && SrcIsHigh) - MI.setDesc(get(HighOpcode)); - else - LOCRMuxJumps++; - - // If we were unable to implement the pseudo with a single instruction, we - // need to convert it back into a branch sequence. This cannot be done here - // since the caller of expandPostRAPseudo does not handle changes to the CFG - // correctly. This change is defered to the SystemZExpandPseudo pass. -} - // MI is an RR-style pseudo instruction that zero-extends the low Size bits // of one GRX32 into another. Replace it with LowOpcode if both operands // are low registers, otherwise use RISB[LH]G. @@ -1013,6 +989,7 @@ MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, VirtRegMap *VRM) const { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + MachineRegisterInfo *MRI = &MF.getRegInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Size = MFI.getObjectSize(FrameIndex); unsigned Opcode = MI.getOpcode(); @@ -1167,54 +1144,94 @@ // If the spilled operand is the final one or the instruction is // commutable, try to change R into . + bool NeedsCommute = false; unsigned NumOps = MI.getNumExplicitOperands(); int MemOpcode = SystemZ::getMemOpcode(Opcode); + if (MemOpcode == -1) + return nullptr; + + // A function to get the mapped physical reg or subreg from MO. + auto getPhysReg = [&](MachineOperand &MO) -> unsigned { + unsigned Reg = MO.getReg(); + unsigned PhysReg = (TRI->isVirtualRegister(Reg) ? VRM->getPhys(Reg) : Reg); + if (PhysReg && MO.getSubReg()) + PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg()); + return PhysReg; + }; // See if this is a 3-address instruction that is convertible to 2-address // and suitable for folding below. Only try this with virtual registers // and a provided VRM (during regalloc). - bool NeedsCommute = false; - if (SystemZ::getTwoOperandOpcode(Opcode) != -1 && MemOpcode != -1) { + if (SystemZ::getTwoOperandOpcode(Opcode) != -1) { if (VRM == nullptr) - MemOpcode = -1; - else { - assert(NumOps == 3 && "Expected two source registers."); - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned DstPhys = - (TRI->isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg); - unsigned SrcReg = (OpNum == 2 ? MI.getOperand(1).getReg() - : ((OpNum == 1 && MI.isCommutable()) - ? MI.getOperand(2).getReg() - : 0)); - if (DstPhys && !SystemZ::GRH32BitRegClass.contains(DstPhys) && SrcReg && - TRI->isVirtualRegister(SrcReg) && DstPhys == VRM->getPhys(SrcReg)) - NeedsCommute = (OpNum == 1); - else - MemOpcode = -1; - } + return nullptr; + assert(NumOps == 3 && "Expected two source registers."); + unsigned DstPhys = getPhysReg(MI.getOperand(0)); + unsigned SrcPhys = (OpNum == 2 ? getPhysReg(MI.getOperand(1)) + : ((OpNum == 1 && MI.isCommutable()) + ? getPhysReg(MI.getOperand(2)) + : 0)); + if (DstPhys && !SystemZ::GRH32BitRegClass.contains(DstPhys) && + DstPhys == SrcPhys) + NeedsCommute = (OpNum == 1); + else + return nullptr; + } + // Handle GRX32 compares, which can be folded with either a low or high + // reg. If the register operand has not yet an allocated register it is + // still possible to proceed. It should be possible to simply use the "low" + // opcode for a GRX32 reg and have the register class constrained (below). + else if(Opcode == SystemZ::CRMux || Opcode == SystemZ::CLRMux) { + if (VRM == nullptr) + return nullptr; + // TODO: Swap operands if CC is not live-out? + MachineOperand &SrcMO = MI.getOperand(0); + unsigned SrcPhys = getPhysReg(SrcMO); + if ((SrcPhys && SystemZ::GRH32BitRegClass.contains(SrcPhys)) || + (!SrcPhys && + (SrcMO.getSubReg() == SystemZ::subreg_h32 || + MRI->getRegClass(SrcMO.getReg()) == &SystemZ::GRH32BitRegClass))) + MemOpcode = (MemOpcode == SystemZ::C ? SystemZ::CHF : SystemZ::CLHF); } - if (MemOpcode >= 0) { - if ((OpNum == NumOps - 1) || NeedsCommute) { - const MCInstrDesc &MemDesc = get(MemOpcode); - uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); - assert(AccessBytes != 0 && "Size of access should be known"); - assert(AccessBytes <= Size && "Access outside the frame index"); - uint64_t Offset = Size - AccessBytes; - MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, - MI.getDebugLoc(), get(MemOpcode)); - MIB.add(MI.getOperand(0)); - if (NeedsCommute) - MIB.add(MI.getOperand(2)); - else - for (unsigned I = 1; I < OpNum; ++I) - MIB.add(MI.getOperand(I)); - MIB.addFrameIndex(FrameIndex).addImm(Offset); - if (MemDesc.TSFlags & SystemZII::HasIndex) - MIB.addReg(0); - transferDeadCC(&MI, MIB); - return MIB; + if ((OpNum == NumOps - 1) || NeedsCommute) { + const MCInstrDesc &MemDesc = get(MemOpcode); + uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); + assert(AccessBytes != 0 && "Size of access should be known"); + assert(AccessBytes <= Size && "Access outside the frame index"); + uint64_t Offset = Size - AccessBytes; + MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, + MI.getDebugLoc(), get(MemOpcode)); + MIB.add(MI.getOperand(0)); + if (NeedsCommute) + MIB.add(MI.getOperand(2)); + else + for (unsigned I = 1; I < OpNum; ++I) + MIB.add(MI.getOperand(I)); + // Constrain GRX32 registers to match the new instruction. + for (unsigned Idx = 0; Idx < MIB->getNumOperands(); ++Idx) { + MachineOperand &MO = MIB->getOperand(Idx); + if (!MO.isReg() || MO.isImplicit()) + break; + const TargetRegisterClass *DescRC = + getRegClass(MIB->getDesc(), Idx, TRI, MF); + if (DescRC == &SystemZ::GR32BitRegClass || + DescRC == &SystemZ::GRH32BitRegClass) { + unsigned Reg = MO.getReg(); + if (TRI->isVirtualRegister(Reg) && + MRI->getRegClass(Reg) == &SystemZ::GRX32BitRegClass) { + MRI->setRegClass(Reg, DescRC); + unsigned PhysReg = VRM->getPhys(Reg); + assert((!PhysReg || DescRC->contains(PhysReg)) && + "Bad reg/opcode match"); + } + } } + MIB.addFrameIndex(FrameIndex).addImm(Offset); + if (MemDesc.TSFlags & SystemZII::HasIndex) + MIB.addReg(0); + transferDeadCC(&MI, MIB); + return MIB; } return nullptr; @@ -1281,10 +1298,6 @@ expandLOCPseudo(MI, SystemZ::LOCHI, SystemZ::LOCHHI); return true; - case SystemZ::LOCRMux: - expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR); - return true; - case SystemZ::STCMux: expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH); return true; @@ -1365,14 +1378,6 @@ expandRIPseudo(MI, SystemZ::AFI, SystemZ::AIH, false); return true; - case SystemZ::CHIMux: - expandRIPseudo(MI, SystemZ::CHI, SystemZ::CIH, false); - return true; - - case SystemZ::CFIMux: - expandRIPseudo(MI, SystemZ::CFI, SystemZ::CIH, false); - return true; - case SystemZ::CLFIMux: expandRIPseudo(MI, SystemZ::CLFI, SystemZ::CLIH, false); return true; Index: lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.td +++ lib/Target/SystemZ/SystemZInstrInfo.td @@ -890,6 +890,8 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Addition of a register. let isCommutable = 1 in { + // Expands to AR(K), AHHHR or AHHLR, depending on the choice of registers. + def ARMuxK : BinaryRRX32Pseudo; defm AR : BinaryRRAndK<"ar", 0x1A, 0xB9F8, z_sadd, GR32, GR32>; defm AGR : BinaryRREAndK<"agr", 0xB908, 0xB9E8, z_sadd, GR64, GR64>; } @@ -932,6 +934,8 @@ let Defs = [CC] in { // Addition of a register. let isCommutable = 1 in { + // Expands to ALR(K), ALHHHR or ALHHLR, depending on the choice of registers + def ALRMuxK : BinaryRRX32Pseudo; defm ALR : BinaryRRAndK<"alr", 0x1E, 0xB9FA, z_uadd, GR32, GR32>; defm ALGR : BinaryRREAndK<"algr", 0xB90A, 0xB9EA, z_uadd, GR64, GR64>; } @@ -991,6 +995,8 @@ // Subtraction producing a signed overflow flag. let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Subtraction of a register. + // Expands to SR(K), SHHHR or SHHLR, depending on the choice of registers. + def SRMuxK : BinaryRRX32Pseudo; defm SR : BinaryRRAndK<"sr", 0x1B, 0xB9F9, z_ssub, GR32, GR32>; def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>; defm SGR : BinaryRREAndK<"sgr", 0xB909, 0xB9E9, z_ssub, GR64, GR64>; @@ -1040,6 +1046,8 @@ // Subtraction producing a carry. let Defs = [CC] in { // Subtraction of a register. + // Expands to SLR(K), SLHHHR or SLHHLR, depending on the choice of registers. + def SLRMuxK : BinaryRRX32Pseudo; defm SLR : BinaryRRAndK<"slr", 0x1F, 0xB9FB, z_usub, GR32, GR32>; def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>; defm SLGR : BinaryRREAndK<"slgr", 0xB90B, 0xB9EB, z_usub, GR64, GR64>; @@ -1425,7 +1433,9 @@ // some of the signed forms have COMPARE AND BRANCH equivalents whereas none // of the unsigned forms do. let Defs = [CC], CCValues = 0xE in { - // Comparison with a register. + // Comparison with a register. CRMux expands to CR, CHHR or CHLR, + // depending on the choice of registers. + def CRMux : CompareRRX32Pseudo; def CR : CompareRR <"cr", 0x19, z_scmp, GR32, GR32>; def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>; def CGR : CompareRRE<"cgr", 0xB920, z_scmp, GR64, GR64>; @@ -1477,7 +1487,9 @@ // Unsigned comparisons. let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { - // Comparison with a register. + // Comparison with a register. CLRMux expands to CLR, CLHHR or CLHLR, + // depending on the choice of registers. + def CLRMux : CompareRRX32Pseudo; def CLR : CompareRR <"clr", 0x15, z_ucmp, GR32, GR32>; def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>; def CLGR : CompareRRE<"clgr", 0xB921, z_ucmp, GR64, GR64>; Index: lib/Target/SystemZ/SystemZPostRewrite.cpp =================================================================== --- lib/Target/SystemZ/SystemZPostRewrite.cpp +++ lib/Target/SystemZ/SystemZPostRewrite.cpp @@ -17,6 +17,7 @@ #include "SystemZInstrInfo.h" #include "SystemZSubtarget.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" using namespace llvm; @@ -25,6 +26,19 @@ #define DEBUG_TYPE "systemz-postrewrite" STATISTIC(MemFoldCopies, "Number of copies inserted before folded mem ops."); +STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)"); +STATISTIC(LOCRs, "Number of LOCRs"); +STATISTIC(MUXLOWs, "Number of Muxes using only low parts"); +STATISTIC(MUXHIGHs, "Number of Muxes using only high parts"); +STATISTIC(MUXHILOs, "Number of Muxes using high and low parts"); +STATISTIC(BADMUXs, "Number of Muxes ending up in illegal registers"); +STATISTIC(MuxCopyDst, "Number of Mux pseudos needing a copy to dst."); +STATISTIC(MuxRotate2, "Number of Mux pseudos needing two rotates of a reg."); +STATISTIC(MuxRHSNeg, "Number of Mux pseudos negation of RHS (subtractions)."); +STATISTIC(MuxCmpSwap, "Number of Mux pseudo compares swapped."); +STATISTIC(MuxCmpNonSwappable, + "Number of Mux pseudo compares not swappable."); +STATISTIC(MuxCmpCCLiveOut, "Number of Mux pseudo compares with live out CC."); namespace llvm { void initializeSystemZPostRewritePass(PassRegistry&); @@ -40,20 +54,58 @@ } const SystemZInstrInfo *TII; + const TargetRegisterInfo *TRI; bool runOnMachineFunction(MachineFunction &Fn) override; StringRef getPassName() const override { return SYSTEMZ_POSTREWRITE_NAME; } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } private: + + struct MuxInfo { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineInstr *const MI; + bool Rotated; + bool DstRegUsed; + + MuxInfo(MachineInstr &mi_); + unsigned getReg(unsigned Idx) const { return MI->getOperand(Idx).getReg(); } + unsigned getReg64(unsigned Idx) const; + bool isHIGH(unsigned Idx) const { + assert(Idx < (MI->isCompare() ? 2 : 3) && "Operand index out of range."); + return SystemZ::GRH32BitRegClass.contains(getReg(Idx)); + } + bool isLOW(unsigned Idx) const { return !isHIGH(Idx); } + bool sameReg(unsigned A, unsigned B) const { return getReg(A) == getReg(B); } + bool differentRegs(unsigned A, unsigned B) const { return !sameReg(A, B); } + bool sameRegs64(unsigned A, unsigned B) const { + return getReg64(A) == getReg64(B); + } + bool differentRegs64(unsigned A, unsigned B) const { + return !sameRegs64(A, B); + } + + void useDstRegForOps(unsigned Idx); + void RotateReg0(); + void copyRegToDst(unsigned Idx); + void commute(); + bool trySelectAddSubMux(); + bool trySelectCmpMux(); + }; + + bool expandLOCRMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandAddSubMux(MachineBasicBlock::iterator MBBI); + bool expandCmpMux(MachineBasicBlock::iterator MBBI); bool selectMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); bool selectMBB(MachineBasicBlock &MBB); + bool updateLiveInLists_CC(MachineBasicBlock &MBB); }; char SystemZPostRewrite::ID = 0; @@ -68,11 +120,437 @@ return new SystemZPostRewrite(); } +SystemZPostRewrite::MuxInfo::MuxInfo(MachineInstr &mi_) : MI(&mi_) { + const TargetSubtargetInfo &Subtarget = + MI->getParent()->getParent()->getSubtarget(); + TII = Subtarget.getInstrInfo(); + TRI = Subtarget.getRegisterInfo(); + Rotated = false; + DstRegUsed = false; +} + +unsigned SystemZPostRewrite::MuxInfo::getReg64(unsigned Idx) const { + unsigned Reg = getReg(Idx); + unsigned SubRegIdx = (SystemZ::GR32BitRegClass.contains(Reg) ? + SystemZ::subreg_l32 : SystemZ::subreg_h32); + unsigned Reg64 = TRI->getMatchingSuperReg(Reg, SubRegIdx, + &SystemZ::GR64BitRegClass); + assert(Reg64 && "Could not find the supreregister of a GRX32 register."); + return Reg64; +} + +// Copy the source register at Idx into the dst-reg of MI and use dst-reg +// instead in any operands that use the copied source register. +void SystemZPostRewrite::MuxInfo::useDstRegForOps(unsigned Idx) { + assert(MI->getOperand(0).isDef() && "Expected a defined register to use."); + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned ReplacedReg = getReg(Idx); + unsigned DstReg = getReg(0); + BuildMI(*MBB, MI, DL, TII->get(SystemZ::COPY), DstReg) + .addReg(ReplacedReg); + for (unsigned OpIdx = 1; OpIdx < 3; ++OpIdx) { + MachineOperand &MO = MI->getOperand(OpIdx); + if (MO.getReg() == ReplacedReg) + MO.setReg(DstReg); + } + MuxCopyDst++; + DstRegUsed = true; +} + +void SystemZPostRewrite::MuxInfo::RotateReg0() { + MachineBasicBlock::iterator InsPos = MI; + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Reg64 = getReg64(0); + if (!Rotated) { + unsigned Reg = getReg(0); + unsigned OtherSubRegIdx = + isLOW(0) ? SystemZ::subreg_h32 : SystemZ::subreg_l32; + unsigned OtherReg = TRI->getSubReg(Reg64, OtherSubRegIdx); + unsigned NumOps = (MI->isCompare() ? 2 : 3); + bool Reg0Used = false; + for (unsigned Idx = 0; Idx < NumOps; ++Idx) { + MachineOperand &MO = MI->getOperand(Idx); + if (MO.getReg() == Reg) { + MO.setReg(OtherReg); + Reg0Used |= MO.isUse(); + } + else if (MO.getReg() == OtherReg) + MO.setReg(Reg); + } + // Rotate the 64-bit register. Mark it as undef and add an extra use + // operand for the 32-bit register if used. + MachineInstrBuilder MIB = + BuildMI(*MBB, InsPos, DL, TII->get(SystemZ::RLLG), Reg64) + .addReg(Reg64, getUndefRegState(true)) + .addReg(0).addImm(32); + if (Reg0Used) + MIB.addReg(Reg, RegState::Implicit); + Rotated = true; + } else { + // Rotate the 64-bit register back after MI. + BuildMI(*MBB, ++InsPos, DL, TII->get(SystemZ::RLLG), Reg64) + .addReg(Reg64) + .addReg(0).addImm(32); + } +} + +void SystemZPostRewrite::MuxInfo::copyRegToDst(unsigned Idx) { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned SrcReg = getReg(Idx); + unsigned DstReg = getReg(0); + BuildMI(*MBB, MI, DL, TII->get(SystemZ::COPY), DstReg) + .addReg(SrcReg); +} + +void SystemZPostRewrite::MuxInfo::commute() { + // If MI is a subtraction, negate RHS and convert to addition. + bool ConvertedToAddition = true; + switch(MI->getOpcode()) { + case SystemZ::SRMuxK: + MI->setDesc(TII->get(SystemZ::ARMuxK)); + break; + case SystemZ::SLRMuxK: + MI->setDesc(TII->get(SystemZ::ALRMuxK)); + break; + default: + ConvertedToAddition = false; + break; + } + if (ConvertedToAddition) { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Reg = getReg(2); + if (isLOW(2)) { + BuildMI(*MBB, MI, DL, TII->get(SystemZ::LNR), Reg) + .addReg(Reg); + } else { + BuildMI(*MBB, MI, DL, TII->get(SystemZ::XIHF), Reg) + .addReg(Reg) + .addImm(0xffffffff); + BuildMI(*MBB, MI, DL, TII->get(SystemZ::AIH), Reg) + .addReg(Reg) + .addImm(1); + } + MuxRHSNeg++; + } + + TII->commuteInstruction(*MI, false/*NewMI*/, 1, 2); +} + +bool SystemZPostRewrite::MuxInfo::trySelectAddSubMux() { + unsigned LowOpcode, HiOpcode, HiLowOpcode; + unsigned Opc = MI->getOpcode(); + if (Opc == SystemZ::ARMuxK) { + LowOpcode = SystemZ::ARK; + HiOpcode = SystemZ::AHHHR; + HiLowOpcode = SystemZ::AHHLR; + } else if (Opc == SystemZ::ALRMuxK) { + LowOpcode = SystemZ::ALRK; + HiOpcode = SystemZ::ALHHHR; + HiLowOpcode = SystemZ::ALHHLR; + } else if (Opc == SystemZ::SRMuxK) { + LowOpcode = SystemZ::SRK; + HiOpcode = SystemZ::SHHHR; + HiLowOpcode = SystemZ::SHHLR; + } else if (Opc == SystemZ::SLRMuxK) { + LowOpcode = SystemZ::SLRK; + HiOpcode = SystemZ::SLHHHR; + HiLowOpcode = SystemZ::SLHHLR; + } else + llvm_unreachable("Bad opcode."); + + // Try to find an opcode to use. + unsigned Opcode = 0; + if (isLOW(0) && isLOW(1) && isLOW(2)) + Opcode = (getReg(0) == getReg(1) ? + SystemZ::getTwoOperandOpcode(LowOpcode) : LowOpcode); + else if (isHIGH(0) && isHIGH(1) && isHIGH(2)) + Opcode = HiOpcode; + else if (isHIGH(0) && isHIGH(1) && isLOW(2)) + Opcode = HiLowOpcode; + else + return false; + MI->setDesc(TII->get(Opcode)); + + // Make sure that the tied-to flag is correctly set or unset. + bool IsTied = MI->getOperand(1).isTied(); + bool ShouldBeTied = + (MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0); + if (IsTied && !ShouldBeTied) + MI->untieRegOperand(1); + else if (!IsTied && ShouldBeTied) + MI->tieOperands(0, 1); + + return true; +} + +bool SystemZPostRewrite::MuxInfo::trySelectCmpMux() { + unsigned LowOpcode, HiOpcode, HiLowOpcode; + if (MI->getOpcode() == SystemZ::CRMux) { + LowOpcode = SystemZ::CR; + HiOpcode = SystemZ::CHHR; + HiLowOpcode = SystemZ::CHLR; + } else if (MI->getOpcode() == SystemZ::CLRMux) { + LowOpcode = SystemZ::CLR; + HiOpcode = SystemZ::CLHHR; + HiLowOpcode = SystemZ::CLHLR; + } else + llvm_unreachable("Bad opcode."); + + // Try to find an opcode to use. + unsigned Opcode = 0; + if (isLOW(0) && isLOW(1)) + Opcode = LowOpcode; + else if (isHIGH(0) && isHIGH(1)) + Opcode = HiOpcode; + else if (isHIGH(0) && isLOW(1)) + Opcode = HiLowOpcode; + else + return false; + MI->setDesc(TII->get(Opcode)); + + return true; +} + +// MI is a load-register-on-condition pseudo instruction that could not be +// handled as a single hardware instruction. Replace it by a branch sequence. +bool SystemZPostRewrite::expandLOCRMux(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineFunction &MF = *MBB.getParent(); + const BasicBlock *BB = MBB.getBasicBlock(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(2).getReg(); + unsigned CCValid = MI.getOperand(3).getImm(); + unsigned CCMask = MI.getOperand(4).getImm(); + + LivePhysRegs LiveRegs(TII->getRegisterInfo()); + LiveRegs.addLiveOuts(MBB); + for (auto I = std::prev(MBB.end()); I != MBBI; --I) + LiveRegs.stepBackward(*I); + + // Splice MBB at MI, moving the rest of the block into RestMBB. + MachineBasicBlock *RestMBB = MF.CreateMachineBasicBlock(BB); + MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB); + RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end()); + RestMBB->transferSuccessors(&MBB); + for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) + RestMBB->addLiveIn(*I); + + // Create a new block MoveMBB to hold the move instruction. + MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB); + MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB); + MoveMBB->addLiveIn(SrcReg); + for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) + MoveMBB->addLiveIn(*I); + + // At the end of MBB, create a conditional branch to RestMBB if the + // condition is false, otherwise fall through to MoveMBB. + BuildMI(&MBB, DL, TII->get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask ^ CCValid).addMBB(RestMBB); + MBB.addSuccessor(RestMBB); + MBB.addSuccessor(MoveMBB); + + // In MoveMBB, emit an instruction to move SrcReg into DestReg, + // then fall through to RestMBB. + TII->copyPhysReg(*MoveMBB, MoveMBB->end(), DL, DestReg, SrcReg, + MI.getOperand(2).isKill()); + MoveMBB->addSuccessor(RestMBB); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + return true; +} + +bool SystemZPostRewrite::expandAddSubMux(MachineBasicBlock::iterator MBBI) { + MuxInfo MuxI(*MBBI); + bool IsAdd = MBBI->getDesc().isCommutable(); + if (MuxI.isLOW(0) && MuxI.isLOW(1) && MuxI.isLOW(2)) + MUXLOWs++; + else if (MuxI.isHIGH(0) && MuxI.isHIGH(1) && MuxI.isHIGH(2)) + MUXHIGHs++; + else if (MuxI.isHIGH(0) && (MuxI.isHIGH(1) || MuxI.isHIGH(2))) { + if (MuxI.isHIGH(2)) { // HLH is unsupported. + if (IsAdd) + MUXHILOs++; + else + BADMUXs++; + } else + MUXHILOs++; + } + else + BADMUXs++; + + // 1. See if legal already (after a free commute if needed). + if (IsAdd && MuxI.isHIGH(0) && MuxI.isLOW(1) && MuxI.isHIGH(2)) + MuxI.commute(); + if (MuxI.trySelectAddSubMux()) + return true; + // 2. Use destination register if not used by any source operand and if it + // would make operands legal. + if (MuxI.differentRegs(0, 1) && MuxI.differentRegs(0, 2)) { + if (MuxI.isLOW(0)) { + if (MuxI.isLOW(1) && MuxI.isHIGH(2)) + MuxI.useDstRegForOps(2); // LLH -> LLL + else if (MuxI.isHIGH(1) && (MuxI.isLOW(2) || MuxI.sameReg(1, 2))) + MuxI.useDstRegForOps(1); // LHL/LH0H0 -> LLL + } + else if (MuxI.isLOW(1)) + MuxI.useDstRegForOps(1); // HLX -> HHX + } + // 3. Handle the remaining cases + if (!MuxI.DstRegUsed) { + if (MuxI.isLOW(0) && MuxI.isLOW(1) && MuxI.isHIGH(2)) { + assert(MuxI.sameReg(0, 1) && "Case with free dst reg already handled."); + MuxI.RotateReg0(); // LLH -> HHH + } + else if (MuxI.isLOW(0) && MuxI.isHIGH(1) && MuxI.isLOW(2)) { + assert(MuxI.sameReg(0, 2) && "Case with free dst reg already handled."); + if (MuxI.sameRegs64(0, 1)) + MuxI.commute(); // LHL -> LLH + MuxI.RotateReg0(); // LHL/LLH -> HHX + } + else if (MuxI.isLOW(0) && MuxI.isHIGH(1) && MuxI.isHIGH(2)) { + if (MuxI.sameRegs64(0, 1)) { + assert(!MuxI.sameRegs64(0, 2) && + "Case with free dst reg already handled."); + // Instead of L0H0H -> rotate -> H0L0H -> use-dst -> H0H0H, just copy + // H0 to L0, which achieves the same result of a duplicated value + // into both L0 and H0. + MuxI.copyRegToDst(1); + MuxI.MI->getOperand(0).setReg(MuxI.getReg(1)); + MuxI.Rotated = true; + } + else + MuxI.RotateReg0(); // LHH -> HHH/HHL + } + else if (MuxI.isHIGH(0) && MuxI.isLOW(1) && MuxI.isHIGH(2)) { + assert(MuxI.sameReg(0, 2) && "Case with free dst reg already handled."); + assert(!IsAdd && "Add was handled earlier."); + if (MuxI.differentRegs64(0, 1)) + MuxI.RotateReg0(); // HLH -> LLL + else + MuxI.commute(); // HLH -> HHL + } + } + + bool Success = MuxI.trySelectAddSubMux(); + assert(Success && "Expected a legal register combination by now."); + if (MuxI.Rotated) { + MuxI.RotateReg0(); + MuxRotate2++; + } + + return true; +} + +bool SystemZPostRewrite::expandCmpMux(MachineBasicBlock::iterator MBBI) { + MachineBasicBlock *MBB = MBBI->getParent(); + // MBBI is a 32 bit compare with register pseudo instruction. Replace it + // with one of the three possible opcodes. "LowHi" is not supported. + MuxInfo MuxI(*MBBI); + if (MuxI.isLOW(0) && MuxI.isLOW(1)) + MUXLOWs++; + else if (MuxI.isHIGH(0) && MuxI.isHIGH(1)) + MUXHIGHs++; + else if (MuxI.isHIGH(0) && MuxI.isLOW(1)) + MUXHILOs++; + else + BADMUXs++; + + // 1. See if legal already + if (MuxI.trySelectCmpMux()) + return true; + assert(MuxI.isLOW(0) && MuxI.isHIGH(1) && "Only Cmp Lo/Hi is illegal"); + + // 2. Try to swap the operands if possible. + if (!SystemZRegisterInfo::isCCLiveOut(*MBB)) { + bool Swappable = true; + SmallVector CCUsers; + for (MachineBasicBlock::iterator Itr = std::next(MBBI); + Itr != MBB->end(); ++Itr) { + if (Itr->readsRegister(SystemZ::CC)) { + unsigned Flags = Itr->getDesc().TSFlags; + if ((Flags & SystemZII::CCMaskFirst) || (Flags & SystemZII::CCMaskLast)) + CCUsers.push_back(&*Itr); + else { + Swappable = false; + MuxCmpNonSwappable++; + break; + } + } + if (Itr->definesRegister(SystemZ::CC)) + break; + } + if (Swappable) { + assert(CCUsers.size() && "No CC users found?"); + // Update all CC users. + for (unsigned Idx = 0; Idx < CCUsers.size(); ++Idx) { + unsigned Flags = CCUsers[Idx]->getDesc().TSFlags; + unsigned FirstOpNum = ((Flags & SystemZII::CCMaskFirst) ? + 0 : CCUsers[Idx]->getNumExplicitOperands() - 2); + MachineOperand &CCMaskMO = CCUsers[Idx]->getOperand(FirstOpNum + 1); + switch(CCMaskMO.getImm()) { + case SystemZ::CCMASK_CMP_LT: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_GT); + break; + case SystemZ::CCMASK_CMP_GT: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_LT); + break; + case SystemZ::CCMASK_CMP_LE: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_GE); + break; + case SystemZ::CCMASK_CMP_GE: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_LE); + break; + case SystemZ::CCMASK_CMP_EQ: + case SystemZ::CCMASK_CMP_NE: + break; + default: + llvm_unreachable("Unexpected CCMask value."); + break; + } + } + // The compare is not marked as commutable, so swap the registers and + // flags here. + MachineOperand &LHS = MuxI.MI->getOperand(0); + MachineOperand &RHS = MuxI.MI->getOperand(1); + MachineOperand Tmp = MachineOperand(LHS); + LHS.setReg(RHS.getReg()); + RHS.setReg(Tmp.getReg()); + LHS.setIsKill(RHS.isKill()); + RHS.setIsKill(Tmp.isKill()); + LHS.setIsUndef(RHS.isUndef()); + RHS.setIsUndef(Tmp.isUndef()); + + bool Success = MuxI.trySelectCmpMux(); + assert(Success && "Expected a legal register combination by now."); + MuxCmpSwap++; + return true; + } + } + else + MuxCmpCCLiveOut++; + + // 3. Use rotations. + MuxI.RotateReg0(); + bool Success = MuxI.trySelectCmpMux(); + assert(Success && "Expected a legal register combination by now."); + MuxI.RotateReg0(); + MuxRotate2++; + return true; +} + /// If MBBI references a pseudo instruction that should be selected here, /// do it and return true. Otherwise return false. bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); @@ -94,6 +572,48 @@ return true; } + switch (Opcode) { + case SystemZ::LOCRMux: { + MuxInfo MuxI(MI); + if (MuxI.isLOW(0) && MuxI.isLOW(2)) + MI.setDesc(TII->get(SystemZ::LOCR)); + else if (MuxI.isHIGH(0) && MuxI.isHIGH(2)) + MI.setDesc(TII->get(SystemZ::LOCFHR)); + else { + expandLOCRMux(MBB, MBBI, NextMBBI); + LOCRMuxJumps++; + return true; + } + LOCRs++; + return true; + } + + case SystemZ::CHIMux: { + bool IsHigh = SystemZ::GRH32BitRegClass.contains(MI.getOperand(0).getReg()); + MI.setDesc(TII->get(IsHigh ? SystemZ::CIH :SystemZ::CHI)); + return true; + } + + case SystemZ::CFIMux: { + bool IsHigh = SystemZ::GRH32BitRegClass.contains(MI.getOperand(0).getReg()); + MI.setDesc(TII->get(IsHigh ? SystemZ::CIH :SystemZ::CFI)); + return true; + } + + case SystemZ::CRMux: + case SystemZ::CLRMux: + return expandCmpMux(MBBI); + + case SystemZ::ARMuxK: + case SystemZ::ALRMuxK: + case SystemZ::SRMuxK: + case SystemZ::SLRMuxK: + return expandAddSubMux(MBBI); + + default: + break; + } + return false; } @@ -112,8 +632,44 @@ return Modified; } +bool SystemZPostRewrite::updateLiveInLists_CC(MachineBasicBlock &MBB) { + if (MBB.isLiveIn(SystemZ::CC)) + return false; + bool Missing = false; + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + for (; MBBI != E; ++MBBI) { + if (MBBI->isDebugInstr()) + continue; + if (MBBI->readsRegister(SystemZ::CC)) { + Missing = true; + break; + } + if (MBBI->definesRegister(SystemZ::CC)) + break; + } + if (Missing || (MBBI == E && SystemZRegisterInfo::isCCLiveOut(MBB))) { + assert(&MBB != &*MBB.getParent()->begin() && + "CC can not be live-in in the entry block of the function."); + LLVM_DEBUG(dbgs() << "SystemZPostRewrite: Adding missing CC to MBB " + << "live-in list!\n"); + MBB.addLiveIn(SystemZ::CC); + return true; + } + return false; +} + bool SystemZPostRewrite::runOnMachineFunction(MachineFunction &MF) { TII = static_cast(MF.getSubtarget().getInstrInfo()); + TRI = MF.getSubtarget().getRegisterInfo(); + + // We must know about all CC users when expanding CRMux, so better make + // sure all live-in CCs are in the live-in lists. + bool Change = true; + while (Change) { + Change = false; + for (auto &MBB : MF) + Change |= updateLiveInLists_CC(MBB); + } bool Modified = false; for (auto &MBB : MF) Index: lib/Target/SystemZ/SystemZRegisterInfo.h =================================================================== --- lib/Target/SystemZ/SystemZRegisterInfo.h +++ lib/Target/SystemZ/SystemZRegisterInfo.h @@ -49,12 +49,19 @@ const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override; + static bool isCCLiveOut(MachineBasicBlock &MBB); + const TargetRegisterClass* getRC32(MachineOperand &MO, + const VirtRegMap *VRM, + const MachineRegisterInfo *MRI) const; bool getRegAllocationHints(unsigned VirtReg, ArrayRef Order, SmallVectorImpl &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override; + bool allowHintRecoloring(unsigned VReg, + unsigned NewPhysReg, + const VirtRegMap &VRM) const override; // Override TargetRegisterInfo.h. bool requiresRegisterScavenging(const MachineFunction &MF) const override { Index: lib/Target/SystemZ/SystemZRegisterInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -21,16 +21,25 @@ #define GET_REGINFO_TARGET_DESC #include "SystemZGenRegisterInfo.inc" +#define DEBUG_TYPE "regalloc" + SystemZRegisterInfo::SystemZRegisterInfo() : SystemZGenRegisterInfo(SystemZ::R14D) {} +bool SystemZRegisterInfo::isCCLiveOut(MachineBasicBlock &MBB) { + for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(SystemZ::CC)) + return true; + return false; +} + // Given that MO is a GRX32 operand, return either GR32 or GRH32 if MO // somehow belongs in it. Otherwise, return GRX32. -static const TargetRegisterClass *getRC32(MachineOperand &MO, - const VirtRegMap *VRM, - const MachineRegisterInfo *MRI) { +const TargetRegisterClass* +SystemZRegisterInfo::getRC32(MachineOperand &MO, + const VirtRegMap *VRM, + const MachineRegisterInfo *MRI) const { const TargetRegisterClass *RC = MRI->getRegClass(MO.getReg()); - if (SystemZ::GR32BitRegClass.hasSubClassEq(RC) || MO.getSubReg() == SystemZ::subreg_l32 || MO.getSubReg() == SystemZ::subreg_hl32) @@ -49,30 +58,38 @@ return &SystemZ::GRH32BitRegClass; } - assert (RC == &SystemZ::GRX32BitRegClass); + assert(RC == &SystemZ::GRX32BitRegClass); return RC; } // Pass the registers of RC as hints while making sure that if any of these -// registers are copy hints (and therefore already in Hints), hint them -// first. +// registers are already hinted (for example as copy hints) and therefore +// already in Hints, hint them first. static void addHints(ArrayRef Order, SmallVectorImpl &Hints, const TargetRegisterClass *RC, const MachineRegisterInfo *MRI) { - SmallSet CopyHints; - CopyHints.insert(Hints.begin(), Hints.end()); + SmallSet Priority; + Priority.insert(Hints.begin(), Hints.end()); Hints.clear(); for (MCPhysReg Reg : Order) - if (CopyHints.count(Reg) && + if (Priority.count(Reg) && RC->contains(Reg) && !MRI->isReserved(Reg)) Hints.push_back(Reg); for (MCPhysReg Reg : Order) - if (!CopyHints.count(Reg) && + if (!Priority.count(Reg) && RC->contains(Reg) && !MRI->isReserved(Reg)) Hints.push_back(Reg); } +// EXPERIMENTAL +#include "llvm/Support/CommandLine.h" +static cl::opt HARDHIORLO("hard-hi-or-lo", cl::init(false)); +static cl::opt SOFTHIORLO("soft-hi-or-lo", cl::init(false)); +static cl::opt HARD_LT_HINTS("hard-lt-hints", cl::init(false)); +static cl::opt HARD_LL_CH_HINTS("hard-ll-ch-hints", cl::init(false)); +static cl::opt MAXREGS("maxregs", cl::init(100)); + bool SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, ArrayRef Order, @@ -81,110 +98,339 @@ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const SystemZSubtarget &Subtarget = MF.getSubtarget(); - const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( VirtReg, Order, Hints, MF, VRM, Matrix); + // Add any two address hints after any copy hints. + SmallSet TwoAddrHints; + if (VRM != nullptr) + for (auto &Use : MRI->reg_nodbg_instructions(VirtReg)) { + unsigned Opcode = Use.getOpcode(); + int TwoOperandOpcode = SystemZ::getTwoOperandOpcode(Opcode); + if (TwoOperandOpcode != -1) { + const MachineOperand *VRRegMO = nullptr; + const MachineOperand *OtherMO = nullptr; + const MachineOperand *CommuMO = nullptr; + if (VirtReg == Use.getOperand(0).getReg()) { + VRRegMO = &Use.getOperand(0); + OtherMO = &Use.getOperand(1); + if (Use.isCommutable()) + CommuMO = &Use.getOperand(2); + } else if (VirtReg == Use.getOperand(1).getReg()) { + VRRegMO = &Use.getOperand(1); + OtherMO = &Use.getOperand(0); + } else if (VirtReg == Use.getOperand(2).getReg() && + Use.isCommutable()) { + VRRegMO = &Use.getOperand(2); + OtherMO = &Use.getOperand(0); + } else + continue; + + const TargetRegisterClass *TwoOperandRC = + TII->getRegClass(TII->get(TwoOperandOpcode), 0, this, MF); + + auto tryAddHint = [&](const MachineOperand *MO) -> void { + unsigned Reg = MO->getReg(); + unsigned PhysReg = isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg); + if (PhysReg) { + if (MO->getSubReg()) + PhysReg = getSubReg(PhysReg, MO->getSubReg()); + if (!TwoOperandRC->contains(PhysReg)) + return; + if (VRRegMO->getSubReg()) + PhysReg = getMatchingSuperReg(PhysReg, VRRegMO->getSubReg(), + MRI->getRegClass(VirtReg)); + if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg)) + TwoAddrHints.insert(PhysReg); + } + }; + tryAddHint(OtherMO); + if (CommuMO) + tryAddHint(CommuMO); + } + } + for (MCPhysReg OrderReg : Order) + if (TwoAddrHints.count(OrderReg)) + Hints.push_back(OrderReg); + + // Avoid expensive fallbacks for GRX32 pseudos by passing hard hints. if (MRI->getRegClass(VirtReg) == &SystemZ::GRX32BitRegClass) { - SmallVector Worklist; - SmallSet DoneRegs; + + // A map from virtual registers to their register classes. The mapped + // value typically goes through these states: + // end() : before analyzeOperands() has been called. + // GRX32 : the typical first value after analyzeOperands(). + // GR32/GRH32 : result of calling constrainReg(). + // nullptr : result of calling constrainReg() again with opposite RC. + struct Reg2RCMap : std::map { + unsigned VirtReg_; + + // SoftHintRC is a secondary (soft) hint that is not propagated. + const TargetRegisterClass *SoftHintRC; + + Reg2RCMap(unsigned Reg) : VirtReg_(Reg) { SoftHintRC = nullptr; } + void constrainReg(unsigned Reg, const TargetRegisterClass *RC, + bool HardHint = true) { + assert(find(Reg) != end() && "Reg uninitialized"); + assert((RC == &SystemZ::GR32BitRegClass || + RC == &SystemZ::GRH32BitRegClass) && + "Should constrain to either low or high parts."); + if (!HardHint) { + if (Reg == VirtReg_) + SoftHintRC = RC; + return; + } + if ((*this)[Reg] == &SystemZ::GRX32BitRegClass) + (*this)[Reg] = RC; + else if ((*this)[Reg] != RC) + // Don't hint either way if the opposite RC is also needed. + (*this)[Reg] = nullptr; + } + bool isConstrained(unsigned Reg) { + assert(find(Reg) != end() && "Reg uninitialized"); + return ((*this)[Reg] == &SystemZ::GR32BitRegClass || + (*this)[Reg] == &SystemZ::GRH32BitRegClass); + } + } Reg2RC(VirtReg); + + // Search all use/def connected instructions iteratively to propagate the + // requirements of registers being in either GR32 or GRH32. At the end, + // pass hints for VirtReg if either GR32 or GRH32 is the result. + const TargetRegisterClass *const LOW = &SystemZ::GR32BitRegClass; + const TargetRegisterClass *const HIGH = &SystemZ::GRH32BitRegClass; + std::list Worklist; + SmallSet DoneRegs; Worklist.push_back(VirtReg); while (Worklist.size()) { - unsigned Reg = Worklist.pop_back_val(); + unsigned Reg = Worklist.front(); + Worklist.pop_front(); + if (DoneRegs.size() > MAXREGS) + break; if (!DoneRegs.insert(Reg).second) continue; - - for (auto &Use : MRI->use_instructions(Reg)) { - // For LOCRMux, see if the other operand is already a high or low - // register, and in that case give the correpsonding hints for - // VirtReg. LOCR instructions need both operands in either high or - // low parts. - if (Use.getOpcode() == SystemZ::LOCRMux) { - MachineOperand &TrueMO = Use.getOperand(1); - MachineOperand &FalseMO = Use.getOperand(2); - const TargetRegisterClass *RC = - TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI), - getRC32(TrueMO, VRM, MRI)); - if (RC && RC != &SystemZ::GRX32BitRegClass) { - addHints(Order, Hints, RC, MRI); - // Return true to make these hints the only regs available to - // RA. This may mean extra spilling but since the alternative is - // a jump sequence expansion of the LOCRMux, it is preferred. - return true; + for (MachineInstr &MI : MRI->reg_nodbg_instructions(Reg)) { + bool Analyzed = false; + unsigned Regs[3] = {0, 0, 0}; + bool RegConstrained[3] = {false, false, false}; + bool HasHigh = false; + bool HasLow = false; + + // A function to look at the operands of MI before all else. + auto analyzeOperands = [&](unsigned NumOps) -> void { + assert(NumOps <= 3 && "Index out of range."); + for (unsigned OpI = 0; OpI < NumOps; ++OpI) { + MachineOperand &MO = MI.getOperand(OpI); + Regs[OpI] = MO.getReg(); + if (Reg2RC.find(Regs[OpI]) == Reg2RC.end()) + // Use VRM to deduce a regclass except for VirtReg (eviction). + Reg2RC[Regs[OpI]] = + getRC32(MO, (Regs[OpI] != VirtReg ? VRM : nullptr), MRI); + RegConstrained[OpI] = Reg2RC.isConstrained(Regs[OpI]); + if (Reg2RC[Regs[OpI]] == HIGH) + HasHigh = true; + else if (Reg2RC[Regs[OpI]] == LOW) + HasLow = true; + } + Analyzed = true; + }; + + // A function to add hints for all operands to make them all high or + // low if possible, to avoid High/Low combinations. If OnlyExtra is + // false, give hard hints. If OnlyExtra is true, give hard or soft + // hints only if one of the experimental CL options for this was + // given. + auto addHiOrLoHints = [&](bool OnlyExtra) -> void { + // (Experimental) + if (!HARDHIORLO && !SOFTHIORLO && OnlyExtra) + return; + bool HintHard = (OnlyExtra ? HARDHIORLO : true); + const TargetRegisterClass *RC = nullptr; + if (HasHigh && !HasLow) + RC = HIGH; + else if (HasLow && !HasHigh) + RC = LOW; + if (RC != nullptr) + for (unsigned Idx = 0; Idx < 3 && Regs[Idx]; ++Idx) + Reg2RC.constrainReg(Regs[Idx], RC, HintHard); + }; + + switch (MI.getOpcode()) { + case SystemZ::CRMux: + case SystemZ::CLRMux: + analyzeOperands(2); + // Avoid needing to rotate the LH case. + if (isCCLiveOut(*MI.getParent())) { + // L_ -> LL + if (Reg2RC[Regs[0]] == LOW) { + Reg2RC.constrainReg(Regs[1], LOW); + break; + } + // _H -> HH + else if (Reg2RC[Regs[1]] == HIGH) { + Reg2RC.constrainReg(Regs[0], HIGH); + break; + } + } + addHiOrLoHints(true/*OnlyExtra*/); + break; + + case SystemZ::SRMuxK: + case SystemZ::SLRMuxK: + analyzeOperands(3); + // Avoid commutation with negation. + // H__ -> HH_ + if (Reg2RC[Regs[0]] == HIGH) { + Reg2RC.constrainReg(Regs[1], HIGH); + break; + } + // _L_ -> LLL + if (Reg2RC[Regs[1]] == LOW) { + Reg2RC.constrainReg(Regs[0], LOW); + Reg2RC.constrainReg(Regs[2], LOW); + break; + } + LLVM_FALLTHROUGH; + case SystemZ::ARMuxK: + case SystemZ::ALRMuxK: + if (!Analyzed) // SRMux/SLRMux done above. + analyzeOperands(3); + + // L__ -> LLL + if (Reg2RC[Regs[0]] == LOW) { + Reg2RC.constrainReg(Regs[1], LOW); + Reg2RC.constrainReg(Regs[2], LOW); + break; + } + // _LL -> LLL + else if (Reg2RC[Regs[1]] == LOW && Reg2RC[Regs[2]] == LOW) { + Reg2RC.constrainReg(Regs[0], LOW); + break; + } + else if (Reg2RC[Regs[0]] == HIGH) { + // HL_ -> HLH + if (Reg2RC[Regs[1]] == LOW) { + Reg2RC.constrainReg(Regs[2], HIGH); + break; + } + // H_L -> HHL + else if (Reg2RC[Regs[2]] == LOW) { + Reg2RC.constrainReg(Regs[1], HIGH); + break; + } + // Hrr -> HHH + else if (Regs[1] == Regs[2]) { + Reg2RC.constrainReg(Regs[1], HIGH); + break; + } + } + // _H? -> HH? + else if (Reg2RC[Regs[1]] == HIGH || Reg2RC[Regs[2]] == HIGH) { + Reg2RC.constrainReg(Regs[0], HIGH); + break; } - // Add the other operand of the LOCRMux to the worklist. - unsigned OtherReg = - (TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg()); - if (MRI->getRegClass(OtherReg) == &SystemZ::GRX32BitRegClass) - Worklist.push_back(OtherReg); - } // end LOCRMux - else if (Use.getOpcode() == SystemZ::CHIMux || - Use.getOpcode() == SystemZ::CFIMux) { - if (Use.getOperand(1).getImm() == 0) { + addHiOrLoHints(true/*OnlyExtra*/); + break; + + case SystemZ::LOCRMux: + analyzeOperands(3); + addHiOrLoHints(false/*OnlyExtra*/); + break; + + case SystemZ::LLCMux: + case SystemZ::LLHMux: + analyzeOperands(1); + Reg2RC.constrainReg(Regs[0], LOW, HARD_LL_CH_HINTS/*HardHint*/); + break; + + case SystemZ::CHIMux: + case SystemZ::CFIMux: + analyzeOperands(1); + if (MI.getOperand(1).getImm() == 0) { bool OnlyLMuxes = true; - for (MachineInstr &DefMI : MRI->def_instructions(VirtReg)) + for (MachineInstr &DefMI : MRI->def_instructions(Reg)) if (DefMI.getOpcode() != SystemZ::LMux) OnlyLMuxes = false; - if (OnlyLMuxes) { - addHints(Order, Hints, &SystemZ::GR32BitRegClass, MRI); - // Return false to make these hints preferred but not obligatory. - return false; - } + if (OnlyLMuxes) + Reg2RC.constrainReg(Regs[0], LOW, HARD_LT_HINTS/*HardHint*/); } - } // end CHIMux / CFIMux - } + break; + + default: break; + } + if (!Analyzed) + continue; + // Add operands to worklist as useful. + bool RestartReg = false; + for (unsigned Idx = 0; Idx < 3 && Regs[Idx]; ++Idx) { + if (!RegConstrained[Idx] && Reg2RC.isConstrained(Regs[Idx])) { + // Revisit immediately any reg that now became constrained. + Worklist.push_front(Regs[Idx]); + DoneRegs.erase(Regs[Idx]); + RestartReg |= (Regs[Idx] == Reg); + } else if (Reg2RC[Regs[Idx]] == &SystemZ::GRX32BitRegClass) + // Add GRX32 registers to back of worklist. + Worklist.push_back(Regs[Idx]); + } + if (RestartReg) + break; // Reg was constrained and will be revisited. + } // MRI->reg_nodbg_instructions(Reg) + } // Worklist + + const TargetRegisterClass *ResultRC = Reg2RC[VirtReg]; + if (ResultRC == nullptr) + return BaseImplRetVal; // Conflicting hard hints. + if (ResultRC != &SystemZ::GRX32BitRegClass) { + assert ((ResultRC == LOW || ResultRC == HIGH) && "Bad Regclass."); + addHints(Order, Hints, ResultRC, MRI); + return true; } - } + if (Reg2RC.SoftHintRC != nullptr) { + assert ((Reg2RC.SoftHintRC == LOW || Reg2RC.SoftHintRC == HIGH) && + "Bad Regclass."); + addHints(Order, Hints, Reg2RC.SoftHintRC, MRI); + return false; + } + } // End GRX32BitRegClass - if (VRM == nullptr) - return BaseImplRetVal; + return BaseImplRetVal; +} - // Add any two address hints after any copy hints. - SmallSet TwoAddrHints; - for (auto &Use : MRI->reg_nodbg_instructions(VirtReg)) - if (SystemZ::getTwoOperandOpcode(Use.getOpcode()) != -1) { - const MachineOperand *VRRegMO = nullptr; - const MachineOperand *OtherMO = nullptr; - const MachineOperand *CommuMO = nullptr; - if (VirtReg == Use.getOperand(0).getReg()) { - VRRegMO = &Use.getOperand(0); - OtherMO = &Use.getOperand(1); - if (Use.isCommutable()) - CommuMO = &Use.getOperand(2); - } else if (VirtReg == Use.getOperand(1).getReg()) { - VRRegMO = &Use.getOperand(1); - OtherMO = &Use.getOperand(0); - } else if (VirtReg == Use.getOperand(2).getReg() && Use.isCommutable()) { - VRRegMO = &Use.getOperand(2); - OtherMO = &Use.getOperand(0); - } else - continue; +bool SystemZRegisterInfo::allowHintRecoloring(unsigned VReg, + unsigned NewPhysReg, + const VirtRegMap &VRM) const { + if (!SystemZ::GRX32BitRegClass.contains(NewPhysReg)) + return true; + unsigned CurrPhys = VRM.getPhys(VReg); + assert(SystemZ::GRX32BitRegClass.contains(CurrPhys) && "Expected GRX32 reg."); + if (SystemZ::GR32BitRegClass.contains(CurrPhys) == + SystemZ::GR32BitRegClass.contains(NewPhysReg)) + return true; - auto tryAddHint = [&](const MachineOperand *MO) -> void { - unsigned Reg = MO->getReg(); - unsigned PhysReg = isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg); - if (PhysReg) { - if (MO->getSubReg()) - PhysReg = getSubReg(PhysReg, MO->getSubReg()); - if (VRRegMO->getSubReg()) - PhysReg = getMatchingSuperReg(PhysReg, VRRegMO->getSubReg(), - MRI->getRegClass(VirtReg)); - if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg)) - TwoAddrHints.insert(PhysReg); - } - }; - tryAddHint(OtherMO); - if (CommuMO) - tryAddHint(CommuMO); + // This is a GRX32 register that has changed between low and high parts. + // Assume that the hard regalloc hints passed are successful in avoiding + // expensive fallbacks for these instrutions, and do not allow the register + // classes to change. + MachineRegisterInfo *MRI = &VRM.getRegInfo(); + for (const MachineInstr &MI : MRI->reg_nodbg_instructions(VReg)) + switch (MI.getOpcode()) { + case SystemZ::CRMux: + case SystemZ::CLRMux: + case SystemZ::SRMuxK: + case SystemZ::SLRMuxK: + case SystemZ::ARMuxK: + case SystemZ::ALRMuxK: + case SystemZ::LOCRMux: + LLVM_DEBUG(dbgs() << "SystemZ: Avoiding replacing " + << getName(CurrPhys) << " with " + << getName(NewPhysReg) << " in "; MI.dump()); + return false; + default: break; } - for (MCPhysReg OrderReg : Order) - if (TwoAddrHints.count(OrderReg)) - Hints.push_back(OrderReg); - return BaseImplRetVal; + return true; } const MCPhysReg * Index: lib/Target/SystemZ/SystemZScheduleZ13.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZ13.td +++ lib/Target/SystemZ/SystemZScheduleZ13.td @@ -366,6 +366,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; @@ -397,6 +398,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], @@ -547,6 +549,7 @@ def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; Index: lib/Target/SystemZ/SystemZScheduleZ14.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZ14.td +++ lib/Target/SystemZ/SystemZScheduleZ14.td @@ -367,6 +367,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; @@ -398,6 +399,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], @@ -557,6 +559,7 @@ def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; Index: lib/Target/SystemZ/SystemZScheduleZ196.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZ196.td +++ lib/Target/SystemZ/SystemZScheduleZ196.td @@ -326,6 +326,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU2, GroupAlone], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "A(L)?G$")>; @@ -359,6 +360,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU2, GroupAlone], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU, LSU, GroupAlone], @@ -508,6 +510,7 @@ def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXU2, LSU, GroupAlone], Index: lib/Target/SystemZ/SystemZScheduleZEC12.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZEC12.td +++ lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -337,6 +337,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "A(L)?G$")>; @@ -370,6 +371,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU, LSU, GroupAlone], @@ -519,6 +521,7 @@ def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXU, NormalGr], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "CH(Y)?$")>; Index: lib/Target/SystemZ/SystemZTargetMachine.cpp =================================================================== --- lib/Target/SystemZ/SystemZTargetMachine.cpp +++ lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -223,8 +223,6 @@ if (getOptLevel() == CodeGenOpt::None) addPass(createSystemZPostRewritePass(getSystemZTargetMachine())); - addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine())); - if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); } Index: test/CodeGen/SystemZ/debuginstr-00.mir =================================================================== --- test/CodeGen/SystemZ/debuginstr-00.mir +++ test/CodeGen/SystemZ/debuginstr-00.mir @@ -63,7 +63,7 @@ bb.1.bb2: liveins: $r0l - CHIMux renamable $r0l, 0, implicit-def $cc + CHI renamable $r0l, 0, implicit-def $cc BRC 14, 6, %bb.1, implicit killed $cc bb.2.bb3.preheader: Index: test/CodeGen/SystemZ/expand-mux-pseudos.mir =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/expand-mux-pseudos.mir @@ -0,0 +1,521 @@ +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -start-after=virtregrewriter \ +# RUN: -verify-machineinstrs -enable-post-misched=false -disable-branch-fold \ +# RUN: %s -o - | FileCheck %s +# +# Test that SystemZExpandPseudo can handle all combinations of high/low +# registers and transform all Add/Sub/Compare GR32X Mux instructions into legal +# target instructions. + +# Addition and subtraction. +name: fun0 +body: | + bb.0: + successors: %bb.1 + $r0l = LHI 0 + $r0h = IIHF 0 + $r1l = LHI 0 + $r1h = IIHF 0 + $r2l = LHI 0 + $r2h = IIHF 0 + + ; CHECK-LABEL: fun0 + ; CHECK: iihf %r2, 0 + + ; Test all combinations of high/low registers as well as same/different + ; 64-bit registers. + + ;; ARMux / ALRMux + ; LLL + renamable $r0l = ARMux renamable $r1l, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: ark %r0, %r1, %r2 + + renamable $r0l = ALRMux renamable $r0l, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: alr %r0, %r2 + + renamable $r0l = ARMux renamable $r1l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: ar %r0, %r1 + + renamable $r0l = ALRMux renamable $r1l, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: alrk %r0, %r1, %r1 + + renamable $r0l = ARMux renamable $r0l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: ar %r0, %r0 + + ; LLH + renamable $r0l = ALRMux renamable $r1l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r0, %r2, 0, 159, 32 + ; CHECK-NEXT: alr %r0, %r1 + + renamable $r0l = ARMux renamable $r0l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r0, %r0, 32 + ; CHECK-NEXT: ahhhr %r0, %r0, %r2 + ; CHECK-NEXT: rllg %r0, %r0, 32 + + renamable $r0l = ALRMux renamable $r1l, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r0, %r0, 0, 159, 32 + ; CHECK-NEXT: alr %r0, %r1 + + renamable $r0l = ARMux renamable $r1l, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r0, %r1, 0, 159, 32 + ; CHECK-NEXT: ar %r0, %r1 + + renamable $r0l = ALRMux renamable $r0l, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r0, %r0, 32 + ; CHECK-NEXT: alhhlr %r0, %r0, %r0 + ; CHECK-NEXT: rllg %r0, %r0, 32 + + ; LHL + renamable $r0l = ARMux renamable $r1h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: risblg %r0, %r1, 0, 159, 32 + ; CHECK-NEXT: ar %r0, %r2 + + renamable $r0l = ALRMux renamable $r0h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: risblg %r0, %r0, 0, 159, 32 + ; CHECK-NEXT: alr %r0, %r2 + + renamable $r0l = ARMux renamable $r1h, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: rllg %r0, %r0, 32 + ; CHECK-NEXT: ahhhr %r0, %r1, %r0 + ; CHECK-NEXT: rllg %r0, %r0, 32 + + renamable $r0l = ALRMux renamable $r1h, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: risblg %r0, %r1, 0, 159, 32 + ; CHECK-NEXT: alr %r0, %r1 + + renamable $r0l = ARMux renamable $r0h, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: rllg %r0, %r0, 32 + ; CHECK-NEXT: ahhlr %r0, %r0, %r0 + ; CHECK-NEXT: rllg %r0, %r0, 32 + + ; LHH + renamable $r0l = ALRMux renamable $r1h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r0, %r0, 32 + ; CHECK-NEXT: alhhhr %r0, %r1, %r2 + ; CHECK-NEXT: rllg %r0, %r0, 32 + + renamable $r0l = ARMux renamable $r0h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r0, %r0, 0, 159, 32 + ; CHECK-NEXT: ahhhr %r0, %r0, %r2 + ; CHECK-NEXT: rllg %r0, %r0, 32 + + renamable $r0l = ALRMux renamable $r1h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r0, %r0, 32 + ; CHECK-NEXT: alhhlr %r0, %r1, %r0 + ; CHECK-NEXT: rllg %r0, %r0, 32 + + renamable $r0l = ARMux renamable $r1h, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r0, %r1, 0, 159, 32 + ; CHECK-NEXT: ar %r0, %r0 + + renamable $r0l = ALRMux renamable $r0h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r0, %r0, 0, 159, 32 + ; CHECK-NEXT: alr %r0, %r0 + + ; HLL + renamable $r0h = ARMux renamable $r1l, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r0, %r1, 0, 159, 32 + ; CHECK-NEXT: ahhlr %r0, %r0, %r2 + + renamable $r0h = ALRMux renamable $r0l, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r0, %r0, 0, 159, 32 + ; CHECK-NEXT: alhhlr %r0, %r0, %r2 + + renamable $r0h = ARMux renamable $r1l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r0, %r1, 0, 159, 32 + ; CHECK-NEXT: ahhlr %r0, %r0, %r0 + + renamable $r0h = ALRMux renamable $r1l, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r0, %r1, 0, 159, 32 + ; CHECK-NEXT: alhhhr %r0, %r0, %r0 + + renamable $r0h = ARMux renamable $r0l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r0, %r0, 0, 159, 32 + ; CHECK-NEXT: ahhhr %r0, %r0, %r0 + + ; HLH + renamable $r0h = ALRMux renamable $r1l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: alhhlr %r0, %r2, %r1 + + renamable $r0h = ARMux renamable $r0l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: ahhlr %r0, %r2, %r0 + + renamable $r0h = ALRMux renamable $r1l, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: alhhlr %r0, %r0, %r1 + + renamable $r0h = ARMux renamable $r1l, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: ahhlr %r0, %r1, %r1 + + renamable $r0h = ALRMux renamable $r0l, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: alhhlr %r0, %r0, %r0 + + ; HHL + renamable $r0h = ARMux renamable $r1h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: ahhlr %r0, %r1, %r2 + + renamable $r0h = ALRMux renamable $r0h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: alhhlr %r0, %r0, %r2 + + renamable $r0h = ARMux renamable $r1h, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: ahhlr %r0, %r1, %r0 + + renamable $r0h = ALRMux renamable $r1h, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: alhhlr %r0, %r1, %r1 + + renamable $r0h = ARMux renamable $r0h, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: ahhlr %r0, %r0, %r0 + + ; HHH + renamable $r0h = ALRMux renamable $r1h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: alhhhr %r0, %r1, %r2 + + renamable $r0h = ARMux renamable $r0h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: ahhhr %r0, %r0, %r2 + + renamable $r0h = ALRMux renamable $r1h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: alhhhr %r0, %r1, %r0 + + renamable $r0h = ARMux renamable $r1h, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: ahhhr %r0, %r1, %r1 + + renamable $r0h = ALRMux renamable $r0h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: alhhhr %r0, %r0, %r0 + + ;; SRMux / SLRMux + ; LLL + renamable $r0l = SRMux renamable $r1l, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: srk %r0, %r1, %r2 + + renamable $r0l = SLRMux renamable $r0l, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: slr %r0, %r2 + + renamable $r0l = SRMux renamable $r1l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: srk %r0, %r1, %r0 + + renamable $r0l = SLRMux renamable $r1l, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: slrk %r0, %r1, %r1 + + renamable $r0l = SRMux renamable $r0l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: sr %r0, %r0 + + ; LLH + renamable $r0l = SLRMux renamable $r1l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r0, %r2, 0, 159, 32 + ; CHECK-NEXT: slrk %r0, %r1, %r0 + + renamable $r0l = SRMux renamable $r0l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r0, %r0, 32 + ; CHECK-NEXT: shhhr %r0, %r0, %r2 + ; CHECK-NEXT: rllg %r0, %r0, 32 + + renamable $r0l = SLRMux renamable $r1l, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r0, %r0, 0, 159, 32 + ; CHECK-NEXT: slrk %r0, %r1, %r0 + + renamable $r0l = SRMux renamable $r1l, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r0, %r1, 0, 159, 32 + ; CHECK-NEXT: srk %r0, %r1, %r0 + + renamable $r0l = SLRMux renamable $r0l, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r0, %r0, 32 + ; CHECK-NEXT: slhhlr %r0, %r0, %r0 + ; CHECK-NEXT: rllg %r0, %r0, 32 + + ; LHL + renamable $r0l = SRMux renamable $r1h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: risblg %r0, %r1, 0, 159, 32 + ; CHECK-NEXT: sr %r0, %r2 + + renamable $r0l = SLRMux renamable $r0h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: risblg %r0, %r0, 0, 159, 32 + ; CHECK-NEXT: slr %r0, %r2 + + renamable $r0l = SRMux renamable $r1h, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: rllg %r0, %r0, 32 + ; CHECK-NEXT: shhhr %r0, %r1, %r0 + ; CHECK-NEXT: rllg %r0, %r0, 32 + + renamable $r0l = SLRMux renamable $r1h, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: risblg %r0, %r1, 0, 159, 32 + ; CHECK-NEXT: slr %r0, %r1 + + renamable $r0l = SRMux renamable $r0h, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: lnr %r0, %r0 + ; CHECK-NEXT: rllg %r0, %r0, 32 + ; CHECK-NEXT: ahhlr %r0, %r0, %r0 + ; CHECK-NEXT: rllg %r0, %r0, 32 + + ; LHH + renamable $r0l = SLRMux renamable $r1h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r0, %r0, 32 + ; CHECK-NEXT: slhhhr %r0, %r1, %r2 + ; CHECK-NEXT: rllg %r0, %r0, 32 + + renamable $r0l = SRMux renamable $r0h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r0, %r0, 0, 159, 32 + ; CHECK-NEXT: shhhr %r0, %r0, %r2 + ; CHECK-NEXT: rllg %r0, %r0, 32 + + renamable $r0l = SLRMux renamable $r1h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r0, %r0, 32 + ; CHECK-NEXT: slhhlr %r0, %r1, %r0 + ; CHECK-NEXT: rllg %r0, %r0, 32 + + renamable $r0l = SRMux renamable $r1h, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r0, %r1, 0, 159, 32 + ; CHECK-NEXT: sr %r0, %r0 + + renamable $r0l = SLRMux renamable $r0h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r0, %r0, 0, 159, 32 + ; CHECK-NEXT: slr %r0, %r0 + + ; HLL + renamable $r0h = SRMux renamable $r1l, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r0, %r1, 0, 159, 32 + ; CHECK-NEXT: shhlr %r0, %r0, %r2 + + renamable $r0h = SLRMux renamable $r0l, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r0, %r0, 0, 159, 32 + ; CHECK-NEXT: slhhlr %r0, %r0, %r2 + + renamable $r0h = SRMux renamable $r1l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r0, %r1, 0, 159, 32 + ; CHECK-NEXT: shhlr %r0, %r0, %r0 + + renamable $r0h = SLRMux renamable $r1l, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r0, %r1, 0, 159, 32 + ; CHECK-NEXT: slhhhr %r0, %r0, %r0 + + renamable $r0h = SRMux renamable $r0l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r0, %r0, 0, 159, 32 + ; CHECK-NEXT: shhhr %r0, %r0, %r0 + + ; HLH + renamable $r0h = SLRMux renamable $r1l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r0, %r1, 0, 159, 32 + ; CHECK-NEXT: slhhhr %r0, %r0, %r2 + + renamable $r0h = SRMux renamable $r0l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r0, %r0, 0, 159, 32 + ; CHECK-NEXT: shhhr %r0, %r0, %r2 + + renamable $r0h = SLRMux renamable $r1l, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r0, %r0, 32 + ; CHECK-NEXT: slrk %r0, %r1, %r0 + ; CHECK-NEXT: rllg %r0, %r0, 32 + + renamable $r0h = SRMux renamable $r1l, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r0, %r1, 0, 159, 32 + ; CHECK-NEXT: shhhr %r0, %r0, %r1 + + renamable $r0h = SLRMux renamable $r0l, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: xihf %r0, 4294967295 + ; CHECK-NEXT: aih %r0, 1 + ; CHECK-NEXT: alhhlr %r0, %r0, %r0 + + ; HHL + renamable $r0h = SRMux renamable $r1h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: shhlr %r0, %r1, %r2 + + renamable $r0h = SLRMux renamable $r0h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: slhhlr %r0, %r0, %r2 + + renamable $r0h = SRMux renamable $r1h, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: shhlr %r0, %r1, %r0 + + renamable $r0h = SLRMux renamable $r1h, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: slhhlr %r0, %r1, %r1 + + renamable $r0h = SRMux renamable $r0h, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: shhlr %r0, %r0, %r0 + + ; HHH + renamable $r0h = SLRMux renamable $r1h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: slhhhr %r0, %r1, %r2 + + renamable $r0h = SRMux renamable $r0h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: shhhr %r0, %r0, %r2 + + renamable $r0h = SLRMux renamable $r1h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: slhhhr %r0, %r1, %r0 + + renamable $r0h = SRMux renamable $r1h, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: shhhr %r0, %r1, %r1 + + renamable $r0h = SLRMux renamable $r0h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: slhhhr %r0, %r0, %r0 + + + bb.1: + successors: + +... + + +# Comparisons. +--- +name: fun1 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + $r1l = LHI 0 + $r1h = IIHF 0 + $r2l = LHI 0 + $r2h = IIHF 0 + + ; CHECK-LABEL: fun1 + ; CHECK: iihf %r2, 0 + + ; LL + CRMux renamable $r1l, renamable $r2l, implicit-def $cc + BRC 14, 4, %bb.2, implicit killed $cc + ; CHECK-NEXT: crjl %r1, %r2, .LBB1_2 + + bb.1: + successors: + $r0l = LHI 0 + + bb.2: + successors: %bb.3, %bb.4 + liveins: $r1h, $r2l + + ; HL + CRMux renamable $r1h, renamable $r2l, implicit-def $cc + BRC 14, 4, %bb.4, implicit killed $cc + ; CHECK-LABEL: .LBB1_2: + ; CHECK-NEXT: chlr %r1, %r2 + ; CHECK-NEXT: jl .LBB1_4 + + bb.3: + successors: + $r0l = LHI 1 + + bb.4: + successors: %bb.5, %bb.6 + liveins: $r1h, $r2h + + ; HH + CRMux renamable $r1h, renamable $r2h, implicit-def $cc + BRC 14, 4, %bb.6, implicit killed $cc + ; CHECK-LABEL: .LBB1_4: + ; CHECK-NEXT: chhr %r1, %r2 + ; CHECK-NEXT: jl .LBB1_6 + + bb.5: + successors: + $r0l = LHI 2 + + bb.6: + successors: %bb.7, %bb.8 + liveins: $r1l, $r2h + + ; LH (local): swap operands and invert condition. + CRMux renamable $r1l, renamable $r2h, implicit-def $cc + BRC 14, 4, %bb.8, implicit killed $cc + ; CHECK-LABEL: .LBB1_6: + ; CHECK-NEXT: chlr %r2, %r1 + ; CHECK-NEXT: jh .LBB1_8 + + bb.7: + successors: + $r0l = LHI 3 + + bb.8: + successors: %bb.9, %bb.10 + liveins: $r1l, $r1h, $r2l + + ; Two users with different conditions, not inverted. + CRMux renamable $r1h, renamable $r2l, implicit-def $cc + renamable $r1l = LOCHI killed renamable $r1l, 0, 14, 8, implicit $cc + BRC 14, 4, %bb.10, implicit killed $cc + ; CHECK-LABEL: .LBB1_8: + ; CHECK-NEXT: chlr %r1, %r2 + ; CHECK-NEXT: lochie %r1, 0 + ; CHECK-NEXT: jl .LBB1_10 + + bb.9: + successors: + $r0l = LHI 4 + + bb.10: + successors: %bb.11, %bb.12 + liveins: $r1l, $r1h, $r2l + + ; Two users with different conditions, inverted. + CRMux renamable $r2l, renamable $r1h, implicit-def $cc + renamable $r1l = LOCHI killed renamable $r1l, 0, 14, 8, implicit $cc + BRC 14, 4, %bb.12, implicit killed $cc + ; CHECK-LABEL: .LBB1_10: + ; CHECK-NEXT: chlr %r1, %r2 + ; CHECK-NEXT: lochie %r1, 0 + ; CHECK-NEXT: jh .LBB1_12 + + bb.11: + successors: + $r0l = LHI 5 + + bb.12: + +... + +--- +name: fun2 +tracksRegLiveness: true +body: | + bb.0: + $r1l = LHI 0 + $r2h = IIHF 0 + + ; CHECK-LABEL: fun2 + ; CHECK: llihl %r2, 0 + + ; LH (live-out / different GR64 regs): rotate + CRMux renamable $r1l, renamable $r2h, implicit-def $cc + ; CHECK-NEXT: rllg %r1, %r1, 32 + ; CHECK-NEXT: chhr %r1, %r2 + ; CHECK-NEXT: rllg %r1, %r1, 32 + + bb.1: + successors: %bb.2, %bb.3 + liveins: $cc + BRC 14, 4, %bb.3, implicit killed $cc + ; CHECK-NEXT: jl + + bb.2: + successors: + $r0l = LHI 0 + + bb.3: + +... + +--- +name: fun3 +tracksRegLiveness: true +body: | + bb.0: + $r1l = LHI 0 + $r1h = IIHF 0 + + ; CHECK-LABEL: fun3 + ; CHECK: iihf %r1, 0 + + ; LH (live-out / same GR64 regs): rotate + CRMux renamable $r1l, renamable $r1h, implicit-def $cc + ; CHECK-NEXT: rllg %r1, %r1, 32 + ; CHECK-NEXT: chlr %r1, %r1 + ; CHECK-NEXT: rllg %r1, %r1, 32 + + bb.1: + successors: %bb.2, %bb.3 + liveins: $cc + BRC 14, 4, %bb.3, implicit killed $cc + ; CHECK-NEXT: jl + + bb.2: + successors: + $r0l = LHI 0 + + bb.3: + +...