Index: include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- include/llvm/CodeGen/TargetInstrInfo.h +++ include/llvm/CodeGen/TargetInstrInfo.h @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOutliner.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/ErrorHandling.h" @@ -932,9 +933,12 @@ /// operand folded, otherwise NULL is returned. /// The new instruction is inserted before MI, and the client is responsible /// for removing the old instruction. + /// If VRM is passed, the assigned physregs can be inspected by target to + /// decide if an opcode is legal to use. MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef Ops, int FI, - LiveIntervals *LIS = nullptr) const; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const; /// Same as the previous version except it allows folding of any load and /// store from / to any address, not just from a specific stack slot. @@ -1024,7 +1028,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const { + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const { return nullptr; } Index: include/llvm/CodeGen/TargetPassConfig.h =================================================================== --- include/llvm/CodeGen/TargetPassConfig.h +++ include/llvm/CodeGen/TargetPassConfig.h @@ -386,6 +386,11 @@ return false; } + /// Add passes to be run immediately after virtual registers are rewritten + /// to physical registers. These passes may replace an MI with a new one, + /// but should preserve SlotIndexes while doing so. + virtual void addPostRewrite() { } + /// This method may be implemented by targets that want to run passes after /// register allocation pass pipeline but before prolog-epilog insertion. virtual void addPostRegAlloc() { } Index: include/llvm/CodeGen/TargetRegisterInfo.h =================================================================== --- include/llvm/CodeGen/TargetRegisterInfo.h +++ include/llvm/CodeGen/TargetRegisterInfo.h @@ -813,6 +813,23 @@ const LiveRegMatrix *Matrix = nullptr) const; + /// Let target reject replacing the previously allocated phys reg with \p + /// NewPhysReg, by returning false. This would typically happen where + /// target has taken care in getRegAllocationHints() to have only a subset + /// of the available registers made available and it is important not to + /// loose this restriction. A better solution would probably be to do like + /// in gcc and provide legal operand assignment combinations at the + /// instruction level. On SystemZ some 32 bit instructions can use either + /// high or low parts of 64 bit registers, but some operand combinations + /// are not possible. For instance, CRMux (Compare Register) can be lowered + /// to C (LOW LOW), CHHR (HIGH, HIGH) or CHLR (HIGH, LOW), but (LOW, HIGH) + /// is *not* supported. + virtual bool allowHintRecoloring(unsigned VReg, + unsigned NewPhysReg, + const VirtRegMap &VRM) const { + return true; + } + /// A callback to allow target a chance to update register allocation hints /// when a register is "changed" (e.g. coalesced) to another register. /// e.g. On ARM, some virtual registers should target register pairs, Index: lib/CodeGen/InlineSpiller.cpp =================================================================== --- lib/CodeGen/InlineSpiller.cpp +++ lib/CodeGen/InlineSpiller.cpp @@ -837,7 +837,7 @@ MachineInstr *FoldMI = LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS) - : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS); + : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM); if (!FoldMI) return false; Index: lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- lib/CodeGen/RegAllocGreedy.cpp +++ lib/CodeGen/RegAllocGreedy.cpp @@ -2966,6 +2966,13 @@ LLVM_DEBUG(dbgs() << "=> Not profitable.\n"); continue; } + + // Check with target for any special replacements to avoid. + if (!TRI->allowHintRecoloring(Reg, PhysReg, *VRM)) { + LLVM_DEBUG(dbgs() << "=> Not suitable.\n"); + continue; + } + // At this point, the cost is either cheaper or equal. If it is // equal, we consider this is profitable because it may expose // more recoloring opportunities. Index: lib/CodeGen/TargetInstrInfo.cpp =================================================================== --- lib/CodeGen/TargetInstrInfo.cpp +++ lib/CodeGen/TargetInstrInfo.cpp @@ -522,7 +522,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, ArrayRef Ops, int FI, - LiveIntervals *LIS) const { + LiveIntervals *LIS, + VirtRegMap *VRM) const { auto Flags = MachineMemOperand::MONone; for (unsigned OpIdx : Ops) Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore @@ -568,7 +569,7 @@ MBB->insert(MI, NewMI); } else { // Ask the target to do the actual folding. - NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS); + NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM); } if (NewMI) { Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -1168,6 +1168,12 @@ addPass(&MachineSchedulerID); if (addRegAssignmentOptimized()) { + // Allow targets to expand pseudo instructions depending on the choice of + // registers. If special requirements are involved (see comment for + // allowHintRecoloring()), it is important that this is done before + // MachineCopyPropagation. + addPostRewrite(); + // Copy propagate to forward register uses and try to eliminate COPYs that // were not coalesced. addPass(&MachineCopyPropagationID); Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -162,7 +162,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const override; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; /// \returns true if a branch from an instruction with opcode \p BranchOpc /// bytes is capable of jumping to a position \p BrOffset bytes away. Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3039,7 +3039,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS) const { + LiveIntervals *LIS, VirtRegMap *VRM) const { // This is a bit of a hack. Consider this instruction: // // %0 = COPY %sp; GPR64all:%0 Index: lib/Target/SystemZ/CMakeLists.txt =================================================================== --- lib/Target/SystemZ/CMakeLists.txt +++ lib/Target/SystemZ/CMakeLists.txt @@ -17,7 +17,6 @@ SystemZCallingConv.cpp SystemZConstantPoolValue.cpp SystemZElimCompare.cpp - SystemZExpandPseudo.cpp SystemZFrameLowering.cpp SystemZHazardRecognizer.cpp SystemZISelDAGToDAG.cpp @@ -30,6 +29,7 @@ SystemZMCInstLower.cpp SystemZRegisterInfo.cpp SystemZSelectionDAGInfo.cpp + SystemZSelectMux.cpp SystemZShortenInst.cpp SystemZSubtarget.cpp SystemZTargetMachine.cpp Index: lib/Target/SystemZ/SystemZ.h =================================================================== --- lib/Target/SystemZ/SystemZ.h +++ lib/Target/SystemZ/SystemZ.h @@ -190,10 +190,10 @@ FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); -FunctionPass *createSystemZExpandPseudoPass(SystemZTargetMachine &TM); FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZSelectMuxPass(SystemZTargetMachine &TM); FunctionPass *createSystemZTDCPass(); } // end namespace llvm Index: lib/Target/SystemZ/SystemZElimCompare.cpp =================================================================== --- lib/Target/SystemZ/SystemZElimCompare.cpp +++ lib/Target/SystemZ/SystemZElimCompare.cpp @@ -103,14 +103,6 @@ } // end anonymous namespace -// Return true if CC is live out of MBB. -static bool isCCLiveOut(MachineBasicBlock &MBB) { - for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) - if ((*SI)->isLiveIn(SystemZ::CC)) - return true; - return false; -} - // Returns true if MI is an instruction whose output equals the value in Reg. static bool preservesValueOf(MachineInstr &MI, unsigned Reg) { switch (MI.getOpcode()) { @@ -598,7 +590,7 @@ // Walk backwards through the block looking for comparisons, recording // all CC users as we go. The subroutines can delete Compare and // instructions before it. - bool CompleteCCUsers = !isCCLiveOut(MBB); + bool CompleteCCUsers = !SystemZRegisterInfo::isCCLiveOut(MBB); SmallVector CCUsers; MachineBasicBlock::iterator MBBI = MBB.end(); while (MBBI != MBB.begin()) { Index: lib/Target/SystemZ/SystemZExpandPseudo.cpp =================================================================== --- lib/Target/SystemZ/SystemZExpandPseudo.cpp +++ /dev/null @@ -1,152 +0,0 @@ -//==-- SystemZExpandPseudo.cpp - Expand pseudo instructions -------*- C++ -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass that expands pseudo instructions into target -// instructions to allow proper scheduling and other late optimizations. This -// pass should be run after register allocation but before the post-regalloc -// scheduling pass. -// -//===----------------------------------------------------------------------===// - -#include "SystemZ.h" -#include "SystemZInstrInfo.h" -#include "SystemZSubtarget.h" -#include "llvm/CodeGen/LivePhysRegs.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -using namespace llvm; - -#define SYSTEMZ_EXPAND_PSEUDO_NAME "SystemZ pseudo instruction expansion pass" - -namespace llvm { - void initializeSystemZExpandPseudoPass(PassRegistry&); -} - -namespace { -class SystemZExpandPseudo : public MachineFunctionPass { -public: - static char ID; - SystemZExpandPseudo() : MachineFunctionPass(ID) { - initializeSystemZExpandPseudoPass(*PassRegistry::getPassRegistry()); - } - - const SystemZInstrInfo *TII; - - bool runOnMachineFunction(MachineFunction &Fn) override; - - StringRef getPassName() const override { return SYSTEMZ_EXPAND_PSEUDO_NAME; } - -private: - bool expandMBB(MachineBasicBlock &MBB); - bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); - bool expandLOCRMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); -}; -char SystemZExpandPseudo::ID = 0; -} - -INITIALIZE_PASS(SystemZExpandPseudo, "systemz-expand-pseudo", - SYSTEMZ_EXPAND_PSEUDO_NAME, false, false) - -/// Returns an instance of the pseudo instruction expansion pass. -FunctionPass *llvm::createSystemZExpandPseudoPass(SystemZTargetMachine &TM) { - return new SystemZExpandPseudo(); -} - -// MI is a load-register-on-condition pseudo instruction that could not be -// handled as a single hardware instruction. Replace it by a branch sequence. -bool SystemZExpandPseudo::expandLOCRMux(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { - MachineFunction &MF = *MBB.getParent(); - const BasicBlock *BB = MBB.getBasicBlock(); - MachineInstr &MI = *MBBI; - DebugLoc DL = MI.getDebugLoc(); - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(2).getReg(); - unsigned CCValid = MI.getOperand(3).getImm(); - unsigned CCMask = MI.getOperand(4).getImm(); - - LivePhysRegs LiveRegs(TII->getRegisterInfo()); - LiveRegs.addLiveOuts(MBB); - for (auto I = std::prev(MBB.end()); I != MBBI; --I) - LiveRegs.stepBackward(*I); - - // Splice MBB at MI, moving the rest of the block into RestMBB. - MachineBasicBlock *RestMBB = MF.CreateMachineBasicBlock(BB); - MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB); - RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end()); - RestMBB->transferSuccessors(&MBB); - for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) - RestMBB->addLiveIn(*I); - - // Create a new block MoveMBB to hold the move instruction. - MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB); - MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB); - MoveMBB->addLiveIn(SrcReg); - for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) - MoveMBB->addLiveIn(*I); - - // At the end of MBB, create a conditional branch to RestMBB if the - // condition is false, otherwise fall through to MoveMBB. - BuildMI(&MBB, DL, TII->get(SystemZ::BRC)) - .addImm(CCValid).addImm(CCMask ^ CCValid).addMBB(RestMBB); - MBB.addSuccessor(RestMBB); - MBB.addSuccessor(MoveMBB); - - // In MoveMBB, emit an instruction to move SrcReg into DestReg, - // then fall through to RestMBB. - TII->copyPhysReg(*MoveMBB, MoveMBB->end(), DL, DestReg, SrcReg, - MI.getOperand(2).isKill()); - MoveMBB->addSuccessor(RestMBB); - - NextMBBI = MBB.end(); - MI.eraseFromParent(); - return true; -} - -/// If MBBI references a pseudo instruction that should be expanded here, -/// do the expansion and return true. Otherwise return false. -bool SystemZExpandPseudo::expandMI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { - MachineInstr &MI = *MBBI; - switch (MI.getOpcode()) { - case SystemZ::LOCRMux: - return expandLOCRMux(MBB, MBBI, NextMBBI); - default: - break; - } - return false; -} - -/// Iterate over the instructions in basic block MBB and expand any -/// pseudo instructions. Return true if anything was modified. -bool SystemZExpandPseudo::expandMBB(MachineBasicBlock &MBB) { - bool Modified = false; - - MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - while (MBBI != E) { - MachineBasicBlock::iterator NMBBI = std::next(MBBI); - Modified |= expandMI(MBB, MBBI, NMBBI); - MBBI = NMBBI; - } - - return Modified; -} - -bool SystemZExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - TII = static_cast(MF.getSubtarget().getInstrInfo()); - - bool Modified = false; - for (auto &MBB : MF) - Modified |= expandMBB(MBB); - return Modified; -} - Index: lib/Target/SystemZ/SystemZInstrFormats.td =================================================================== --- lib/Target/SystemZ/SystemZInstrFormats.td +++ lib/Target/SystemZ/SystemZInstrFormats.td @@ -4574,6 +4574,19 @@ let OpType = "reg"; } +// Like BinaryRRAndK, but expanded after RA depending on the choice of register. +multiclass BinaryRRAndKPseudo { + let NumOpsKey = key, Predicates = [FeatureHighWord] in { + let NumOpsValue = "3" in + def K : Pseudo<(outs cls1:$R1), (ins cls2:$R2, cls3:$R3), []>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1, + Constraints = "$R1 = $R1src" in + def "" : Pseudo<(outs cls1:$R1), (ins cls2:$R1src, cls3:$R3), + [(set cls1:$R1, (operator cls2:$R1src, cls3:$R3))]>; + } +} + // Like BinaryRI, but expanded after RA depending on the choice of register. class BinaryRIPseudo @@ -4621,6 +4634,15 @@ let AccessBytes = bytes; } +// Like CompareRRE, but expanded after RA depending on the choice of +// register. +class CompareRREPseudo + : Pseudo<(outs), (ins cls1:$R1, cls2:$R2), + [(set CC, (operator cls1:$R1, cls2:$R2))]> { + let isCompare = 1; +} + // Like TestBinarySIL, but expanded later. class TestBinarySILPseudo : Pseudo<(outs), (ins bdaddr12only:$BD1, imm:$I2), Index: lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.h +++ lib/Target/SystemZ/SystemZInstrInfo.h @@ -141,6 +141,10 @@ } // end namespace SystemZII +namespace SystemZ { + LLVM_READONLY int getThreeOperandOpcode(uint16_t Opcode); +} + class SystemZInstrInfo : public SystemZGenInstrInfo { const SystemZRegisterInfo RI; SystemZSubtarget &STI; @@ -155,8 +159,6 @@ unsigned HighOpcode) const; void expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode) const; - void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, - unsigned HighOpcode) const; void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned Size) const; void expandLoadStackGuard(MachineInstr *MI) const; @@ -248,7 +250,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const override; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; MachineInstr *foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, Index: lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.cpp +++ lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -46,7 +46,6 @@ #include "SystemZGenInstrInfo.inc" #define DEBUG_TYPE "systemz-II" -STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)"); // Return a mask with Count low bits set. static uint64_t allOnes(unsigned int Count) { @@ -198,29 +197,6 @@ MI.setDesc(get(Opcode)); } -// MI is a load-register-on-condition pseudo instruction. Replace it with -// LowOpcode if source and destination are both low GR32s and HighOpcode if -// source and destination are both high GR32s. -void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, - unsigned HighOpcode) const { - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(2).getReg(); - bool DestIsHigh = isHighReg(DestReg); - bool SrcIsHigh = isHighReg(SrcReg); - - if (!DestIsHigh && !SrcIsHigh) - MI.setDesc(get(LowOpcode)); - else if (DestIsHigh && SrcIsHigh) - MI.setDesc(get(HighOpcode)); - else - LOCRMuxJumps++; - - // If we were unable to implement the pseudo with a single instruction, we - // need to convert it back into a branch sequence. This cannot be done here - // since the caller of expandPostRAPseudo does not handle changes to the CFG - // correctly. This change is defered to the SystemZExpandPseudo pass. -} - // MI is an RR-style pseudo instruction that zero-extends the low Size bits // of one GRX32 into another. Replace it with LowOpcode if both operands // are low registers, otherwise use RISB[LH]G. @@ -1060,8 +1036,9 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS) const { + LiveIntervals *LIS, VirtRegMap *VRM) const { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Size = MFI.getObjectSize(FrameIndex); unsigned Opcode = MI.getOpcode(); @@ -1214,9 +1191,69 @@ } } - // If the spilled operand is the final one, try to change R - // into . - int MemOpcode = SystemZ::getMemOpcode(Opcode); + // If the spilled operand is the final one, try to change R into + // . These Muxes may have a suitable memory instruction. It seems to + // work best to decide on a high/low opcode at this point and constrain the + // register class accordingly if needed. + int MemOpcode = -1; + const TargetRegisterClass *Op0RC32 = nullptr; + switch(Opcode) { + case SystemZ::ARMux: + case SystemZ::ALRMux: + case SystemZ::SRMux: + case SystemZ::SLRMux: + case SystemZ::CRMux: + case SystemZ::CLRMux: { + MachineOperand &MO = MI.getOperand(0); + // Only try this whith a virtual register and a provided VRM. + if (TRI->isVirtualRegister(MO.getReg()) && VRM != nullptr) + Op0RC32 = RI.getRC32(MO, VRM, &MRI); + break; + } + default: break; + } + const TargetRegisterClass *ConstrainedRC = nullptr; + if (Op0RC32 != nullptr) { + if (Op0RC32 != &SystemZ::GRH32BitRegClass) { + switch(Opcode) { + case SystemZ::ARMux: + MemOpcode = SystemZ::A; + break; + case SystemZ::ALRMux: + MemOpcode = SystemZ::AL; + break; + case SystemZ::SRMux: + MemOpcode = SystemZ::S; + break; + case SystemZ::SLRMux: + MemOpcode = SystemZ::SL; + break; + case SystemZ::CRMux: + MemOpcode = SystemZ::C; + break; + case SystemZ::CLRMux: + MemOpcode = SystemZ::CL; + break; + default: break; + } + assert(MemOpcode != -1 && "Should have a opcode for low-part by now."); + ConstrainedRC = &SystemZ::GR32BitRegClass; + } else { + switch(Opcode) { + case SystemZ::CRMux: + MemOpcode = SystemZ::CHF; + break; + case SystemZ::CLRMux: + MemOpcode = SystemZ::CLHF; + break; + default: break; + } + if (MemOpcode != -1) + ConstrainedRC = &SystemZ::GRH32BitRegClass; + } + } + if (MemOpcode == -1) + MemOpcode = SystemZ::getMemOpcode(Opcode); if (MemOpcode >= 0) { unsigned NumOps = MI.getNumExplicitOperands(); if (OpNum == NumOps - 1) { @@ -1233,6 +1270,11 @@ if (MemDesc.TSFlags & SystemZII::HasIndex) MIB.addReg(0); transferDeadCC(&MI, MIB); + if (ConstrainedRC != nullptr) { + unsigned Reg = MIB->getOperand(0).getReg(); + if (MRI.getRegClass(Reg) == &SystemZ::GRX32BitRegClass) + MRI.constrainRegClass(Reg, ConstrainedRC); + } return MIB; } } @@ -1301,10 +1343,6 @@ expandLOCPseudo(MI, SystemZ::LOCHI, SystemZ::LOCHHI); return true; - case SystemZ::LOCRMux: - expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR); - return true; - case SystemZ::STCMux: expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH); return true; @@ -1385,14 +1423,6 @@ expandRIPseudo(MI, SystemZ::AFI, SystemZ::AIH, false); return true; - case SystemZ::CHIMux: - expandRIPseudo(MI, SystemZ::CHI, SystemZ::CIH, false); - return true; - - case SystemZ::CFIMux: - expandRIPseudo(MI, SystemZ::CFI, SystemZ::CIH, false); - return true; - case SystemZ::CLFIMux: expandRIPseudo(MI, SystemZ::CLFI, SystemZ::CLIH, false); return true; Index: lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.td +++ lib/Target/SystemZ/SystemZInstrInfo.td @@ -890,6 +890,8 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Addition of a register. let isCommutable = 1 in { + // Expands to AR(K), AHHHR or AHHLR, depending on the choice of register. + defm ARMux : BinaryRRAndKPseudo<"armux", z_sadd, GRX32, GRX32, GRX32>; defm AR : BinaryRRAndK<"ar", 0x1A, 0xB9F8, z_sadd, GR32, GR32>; defm AGR : BinaryRREAndK<"agr", 0xB908, 0xB9E8, z_sadd, GR64, GR64>; } @@ -932,6 +934,8 @@ let Defs = [CC] in { // Addition of a register. let isCommutable = 1 in { + // Expands to ALR(K), ALHHHR or ALHHLR, depending on the choice of register. + defm ALRMux : BinaryRRAndKPseudo<"alrmux", z_uadd, GRX32, GRX32, GRX32>; defm ALR : BinaryRRAndK<"alr", 0x1E, 0xB9FA, z_uadd, GR32, GR32>; defm ALGR : BinaryRREAndK<"algr", 0xB90A, 0xB9EA, z_uadd, GR64, GR64>; } @@ -991,6 +995,8 @@ // Subtraction producing a signed overflow flag. let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Subtraction of a register. + // Expands to SR(K), SHHHR or SHHLR, depending on the choice of register. + defm SRMux : BinaryRRAndKPseudo<"srmux", z_ssub, GRX32, GRX32, GRX32>; defm SR : BinaryRRAndK<"sr", 0x1B, 0xB9F9, z_ssub, GR32, GR32>; def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>; defm SGR : BinaryRREAndK<"sgr", 0xB909, 0xB9E9, z_ssub, GR64, GR64>; @@ -1040,6 +1046,8 @@ // Subtraction producing a carry. let Defs = [CC] in { // Subtraction of a register. + // Expands to SLR(K), SLHHHR or SLHHLR, depending on the choice of register. + defm SLRMux : BinaryRRAndKPseudo<"slrmux", z_usub, GRX32, GRX32, GRX32>; defm SLR : BinaryRRAndK<"slr", 0x1F, 0xB9FB, z_usub, GR32, GR32>; def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>; defm SLGR : BinaryRREAndK<"slgr", 0xB90B, 0xB9EB, z_usub, GR64, GR64>; @@ -1425,7 +1433,10 @@ // some of the signed forms have COMPARE AND BRANCH equivalents whereas none // of the unsigned forms do. let Defs = [CC], CCValues = 0xE in { - // Comparison with a register. + // Comparison with a register. CRMux expands to CR, CHHR or CHLR, + // depending on the choice of register. + def CRMux : CompareRREPseudo, + Requires<[FeatureHighWord]>; def CR : CompareRR <"cr", 0x19, z_scmp, GR32, GR32>; def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>; def CGR : CompareRRE<"cgr", 0xB920, z_scmp, GR64, GR64>; @@ -1477,7 +1488,10 @@ // Unsigned comparisons. let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { - // Comparison with a register. + // Comparison with a register. CLRMux expands to CLR, CLHHR or CLHLR, + // depending on the choice of register. + def CLRMux : CompareRREPseudo, + Requires<[FeatureHighWord]>; def CLR : CompareRR <"clr", 0x15, z_ucmp, GR32, GR32>; def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>; def CLGR : CompareRRE<"clgr", 0xB921, z_ucmp, GR64, GR64>; Index: lib/Target/SystemZ/SystemZRegisterInfo.h =================================================================== --- lib/Target/SystemZ/SystemZRegisterInfo.h +++ lib/Target/SystemZ/SystemZRegisterInfo.h @@ -49,12 +49,19 @@ const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override; + static bool isCCLiveOut(MachineBasicBlock &MBB); + const TargetRegisterClass* getRC32(MachineOperand &MO, + const VirtRegMap *VRM, + const MachineRegisterInfo *MRI) const; bool getRegAllocationHints(unsigned VirtReg, ArrayRef Order, SmallVectorImpl &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override; + bool allowHintRecoloring(unsigned VReg, + unsigned NewPhysReg, + const VirtRegMap &VRM) const override; // Override TargetRegisterInfo.h. bool requiresRegisterScavenging(const MachineFunction &MF) const override { Index: lib/Target/SystemZ/SystemZRegisterInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -21,16 +21,18 @@ #define GET_REGINFO_TARGET_DESC #include "SystemZGenRegisterInfo.inc" +#define DEBUG_TYPE "regalloc" + SystemZRegisterInfo::SystemZRegisterInfo() : SystemZGenRegisterInfo(SystemZ::R14D) {} // Given that MO is a GRX32 operand, return either GR32 or GRH32 if MO // somehow belongs in it. Otherwise, return GRX32. -static const TargetRegisterClass *getRC32(MachineOperand &MO, - const VirtRegMap *VRM, - const MachineRegisterInfo *MRI) { +const TargetRegisterClass* +SystemZRegisterInfo::getRC32(MachineOperand &MO, + const VirtRegMap *VRM, + const MachineRegisterInfo *MRI) const { const TargetRegisterClass *RC = MRI->getRegClass(MO.getReg()); - if (SystemZ::GR32BitRegClass.hasSubClassEq(RC) || MO.getSubReg() == SystemZ::subreg_l32 || MO.getSubReg() == SystemZ::subreg_hl32) @@ -49,7 +51,7 @@ return &SystemZ::GRH32BitRegClass; } - assert (RC == &SystemZ::GRX32BitRegClass); + assert(RC == &SystemZ::GRX32BitRegClass); return RC; } @@ -73,6 +75,20 @@ Hints.push_back(Reg); } +// EXPERIMENTAL +#include "llvm/Support/CommandLine.h" +static cl::opt HARDHIORLO("hard-hi-or-lo", cl::init(false)); +static cl::opt SOFTHIORLO("soft-hi-or-lo", cl::init(false)); +static cl::opt HARD_LT_HINTS("hard-lt-hints", cl::init(false)); +static cl::opt HARD_LL_CH_HINTS("hard-ll-ch-hints", cl::init(false)); + +bool SystemZRegisterInfo::isCCLiveOut(MachineBasicBlock &MBB) { + for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(SystemZ::CC)) + return true; + return false; +} + bool SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, ArrayRef Order, @@ -81,66 +97,253 @@ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( VirtReg, Order, Hints, MF, VRM, Matrix); if (MRI->getRegClass(VirtReg) == &SystemZ::GRX32BitRegClass) { + // A map from virtual registers to their register classes. + struct Reg2RCMap : std::map { + unsigned VirtReg_; + bool Change; + const TargetRegisterClass *SoftHintRC; + Reg2RCMap(unsigned Reg) : VirtReg_(Reg) { reset(); } + void reset() { + Change = false; + SoftHintRC = nullptr; + } + void findRC(MachineOperand &MO, + const VirtRegMap *VRM, + const MachineRegisterInfo *MRI, + const SystemZRegisterInfo *TRI) { + if (find(MO.getReg()) != end()) + return; + const TargetRegisterClass *RC = TRI->getRC32(MO, VRM, MRI); + (*this)[MO.getReg()] = RC; + } + void constrainReg(unsigned Reg, const TargetRegisterClass *RC, + bool HardHint = true) { + assert((RC == &SystemZ::GR32BitRegClass || + RC == &SystemZ::GRH32BitRegClass) && + "Should constrain to either low or high parts."); + if (!HardHint) { + if (Reg == VirtReg_) + SoftHintRC = RC; + return; + } + if ((*this)[Reg] == &SystemZ::GRX32BitRegClass) { + (*this)[Reg] = RC; + Change = true; + } else if ((*this)[Reg] != nullptr && (*this)[Reg] != RC) { + // Don't hint either way if the opposite RC is also needed. + (*this)[Reg] = nullptr; + Change = true; + } + } + } Reg2RC(VirtReg); + // Search all use/def connected instructions iteratively to propagate the + // requirements of registers being in either GR32 or GR32H. At the end, + // pass hints for VirtReg if either GR32 or GR32H is the result. + const TargetRegisterClass *LOW = &SystemZ::GR32BitRegClass; + const TargetRegisterClass *HIGH = &SystemZ::GRH32BitRegClass; SmallVector Worklist; SmallSet DoneRegs; - Worklist.push_back(VirtReg); - while (Worklist.size()) { - unsigned Reg = Worklist.pop_back_val(); - if (!DoneRegs.insert(Reg).second) - continue; - - for (auto &Use : MRI->use_instructions(Reg)) { - // For LOCRMux, see if the other operand is already a high or low - // register, and in that case give the correpsonding hints for - // VirtReg. LOCR instructions need both operands in either high or - // low parts. - if (Use.getOpcode() == SystemZ::LOCRMux) { - MachineOperand &TrueMO = Use.getOperand(1); - MachineOperand &FalseMO = Use.getOperand(2); - const TargetRegisterClass *RC = - TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI), - getRC32(TrueMO, VRM, MRI)); - if (RC && RC != &SystemZ::GRX32BitRegClass) { - addHints(Order, Hints, RC, MRI); - // Return true to make these hints the only regs available to - // RA. This may mean extra spilling but since the alternative is - // a jump sequence expansion of the LOCRMux, it is preferred. - return true; - } + Reg2RC.Change = true; + while (Reg2RC.Change) { + Reg2RC.reset(); + Worklist.clear(); + Worklist.push_back(VirtReg); + DoneRegs.clear(); + while (Worklist.size()) { + unsigned Reg = Worklist.pop_back_val(); + if (!DoneRegs.insert(Reg).second) + continue; + for (MachineInstr &MI : MRI->reg_nodbg_instructions(Reg)) { + unsigned Regs[3] = {0, 0, 0}; + bool HasHigh = false; + bool HasLow = false; + unsigned OpI = 0; + auto processOperands = [&](unsigned NumOps) -> void { + assert(NumOps <= 3 && "Index out of range."); + for (; OpI < NumOps; ++OpI) { + MachineOperand &MO = MI.getOperand(OpI); + Regs[OpI] = MO.getReg(); + Reg2RC.findRC(MO, VRM, MRI, this); + // Add GRX32 registers to worklist + if (Reg2RC[MO.getReg()] == &SystemZ::GRX32BitRegClass) + Worklist.push_back(MO.getReg()); + else if (Reg2RC[MO.getReg()] == HIGH) + HasHigh = true; + else + HasLow = true; + } + }; + // Add hints for all operands to make them all high or low if + // possible, to avoid the slower High/Low instructions. If + // OnlyExtra is false, give hard hints. If OnlyExtra is true, give + // hard or soft hints only if one of the experimental CL options + // for this was given. + auto addHiOrLoHints = [&](bool OnlyExtra) -> void { + if (!HARDHIORLO && !SOFTHIORLO && OnlyExtra) + return; + bool HintHard = (OnlyExtra ? HARDHIORLO : true); + const TargetRegisterClass *RC = nullptr; + if (HasHigh && !HasLow) + RC = HIGH; + else if (HasLow && !HasHigh) + RC = LOW; + if (RC != nullptr) + for (unsigned Idx = 0; Regs[Idx] && Idx < 3; ++Idx) + Reg2RC.constrainReg(Regs[Idx], RC, HintHard); + }; + + switch (MI.getOpcode()) { + case SystemZ::CRMux: + case SystemZ::CLRMux: + processOperands(2); + // Avoid needing to rotate the LH case. + if (isCCLiveOut(*MI.getParent())) { + // L_ -> LL + if (Reg2RC[Regs[0]] == LOW) + Reg2RC.constrainReg(Regs[1], LOW); + // _H -> HH + else if (Reg2RC[Regs[1]] == HIGH) + Reg2RC.constrainReg(Regs[0], HIGH); + } + addHiOrLoHints(true/*OnlyExtra*/); + break; - // Add the other operand of the LOCRMux to the worklist. - unsigned OtherReg = - (TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg()); - if (MRI->getRegClass(OtherReg) == &SystemZ::GRX32BitRegClass) - Worklist.push_back(OtherReg); - } // end LOCRMux - else if (Use.getOpcode() == SystemZ::CHIMux || - Use.getOpcode() == SystemZ::CFIMux) { - if (Use.getOperand(1).getImm() == 0) { - bool OnlyLMuxes = true; - for (MachineInstr &DefMI : MRI->def_instructions(VirtReg)) - if (DefMI.getOpcode() != SystemZ::LMux) - OnlyLMuxes = false; - if (OnlyLMuxes) { - addHints(Order, Hints, &SystemZ::GR32BitRegClass, MRI); - // Return false to make these hints preferred but not obligatory. - return false; + case SystemZ::SRMux: + case SystemZ::SRMuxK: + case SystemZ::SLRMux: + case SystemZ::SLRMuxK: + processOperands(3); + // H__ -> HH_ + if (Reg2RC[Regs[0]] == HIGH) { + Reg2RC.constrainReg(Regs[1], HIGH); + break; + } + // _L_ -> LLL + if (Reg2RC[Regs[1]] == LOW) { + Reg2RC.constrainReg(Regs[0], LOW); + Reg2RC.constrainReg(Regs[2], LOW); + break; + } + LLVM_FALLTHROUGH; + case SystemZ::ARMux: + case SystemZ::ARMuxK: + case SystemZ::ALRMux: + case SystemZ::ALRMuxK: + processOperands(3); + // L__ -> LLL + if (Reg2RC[Regs[0]] == LOW) { + Reg2RC.constrainReg(Regs[1], LOW); + Reg2RC.constrainReg(Regs[2], LOW); } + // _LL -> LLL + else if (Reg2RC[Regs[1]] == LOW && Reg2RC[Regs[2]] == LOW) + Reg2RC.constrainReg(Regs[0], LOW); + else if (Reg2RC[Regs[0]] == HIGH) { + // HL_ -> HLH + if (Reg2RC[Regs[1]] == LOW) + Reg2RC.constrainReg(Regs[2], HIGH); + // H_L -> HHL + else if (Reg2RC[Regs[2]] == LOW) + Reg2RC.constrainReg(Regs[1], HIGH); + // Hrr -> HHH + else if (Regs[1] == Regs[2]) + Reg2RC.constrainReg(Regs[1], HIGH); + } + // _H? -> HH? + else if (Reg2RC[Regs[1]] == HIGH || Reg2RC[Regs[2]] == HIGH) + Reg2RC.constrainReg(Regs[0], HIGH); + addHiOrLoHints(true/*OnlyExtra*/); + break; + + case SystemZ::LOCRMux: + processOperands(3); + addHiOrLoHints(false/*OnlyExtra*/); + break; + + case SystemZ::LLCMux: + case SystemZ::LLHMux: + processOperands(1); + Reg2RC.constrainReg(Regs[0], LOW, HARD_LL_CH_HINTS/*HardHint*/); + break; + + case SystemZ::CHIMux: + case SystemZ::CFIMux: + processOperands(1); + if (MI.getOperand(1).getImm() == 0) { + bool OnlyLMuxes = true; + for (MachineInstr &DefMI : MRI->def_instructions(Reg)) + if (DefMI.getOpcode() != SystemZ::LMux) + OnlyLMuxes = false; + if (OnlyLMuxes) + Reg2RC.constrainReg(Regs[0], LOW, HARD_LT_HINTS/*HardHint*/); + } + break; + + default: break; } - } // end CHIMux / CFIMux - } + } // MI + } // Worklist + } // Change + const TargetRegisterClass *ResultRC = Reg2RC[VirtReg]; + if (ResultRC == nullptr) + return BaseImplRetVal; + if (ResultRC != &SystemZ::GRX32BitRegClass) { + assert ((ResultRC == LOW || ResultRC == HIGH) && "Bad Regclass."); + addHints(Order, Hints, ResultRC, MRI); + return true; + } + if (Reg2RC.SoftHintRC != nullptr) { + assert ((Reg2RC.SoftHintRC == LOW || Reg2RC.SoftHintRC == HIGH) && + "Bad Regclass."); + addHints(Order, Hints, Reg2RC.SoftHintRC, MRI); + return false; } } return BaseImplRetVal; } +bool SystemZRegisterInfo::allowHintRecoloring(unsigned VReg, + unsigned NewPhysReg, + const VirtRegMap &VRM) const { + if (!SystemZ::GRX32BitRegClass.contains(NewPhysReg)) + return true; + unsigned CurrPhys = VRM.getPhys(VReg); + assert(SystemZ::GRX32BitRegClass.contains(CurrPhys) && "Expected GRX32 reg."); + if (SystemZ::GR32BitRegClass.contains(CurrPhys) == + SystemZ::GR32BitRegClass.contains(NewPhysReg)) + return true; + + // This is a GRX32 register that has changed between low and high parts. + MachineRegisterInfo *MRI = &VRM.getRegInfo(); + for (const MachineInstr &MI : MRI->reg_nodbg_instructions(VReg)) + switch (MI.getOpcode()) { + case SystemZ::LOCRMux: + case SystemZ::CRMux: + case SystemZ::CLRMux: + case SystemZ::SRMuxK: + case SystemZ::SLRMuxK: + case SystemZ::SRMux: + case SystemZ::SLRMux: + case SystemZ::ARMux: + case SystemZ::ARMuxK: + case SystemZ::ALRMux: + case SystemZ::ALRMuxK: + LLVM_DEBUG(dbgs() << "SystemZ: Avoiding replacing " + << getName(CurrPhys) << " with " + << getName(NewPhysReg) << " in "; MI.dump()); + return false; + default: break; + } + + return true; +} + const MCPhysReg * SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const SystemZSubtarget &Subtarget = MF->getSubtarget(); Index: lib/Target/SystemZ/SystemZScheduleZ13.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZ13.td +++ lib/Target/SystemZ/SystemZScheduleZ13.td @@ -366,6 +366,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; @@ -397,6 +398,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], @@ -547,6 +549,7 @@ def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; Index: lib/Target/SystemZ/SystemZScheduleZ14.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZ14.td +++ lib/Target/SystemZ/SystemZScheduleZ14.td @@ -367,6 +367,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; @@ -398,6 +399,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], @@ -557,6 +559,7 @@ def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; Index: lib/Target/SystemZ/SystemZScheduleZ196.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZ196.td +++ lib/Target/SystemZ/SystemZScheduleZ196.td @@ -326,6 +326,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU2, GroupAlone], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "A(L)?G$")>; @@ -359,6 +360,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU2, GroupAlone], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU, LSU, GroupAlone], @@ -508,6 +510,7 @@ def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXU2, LSU, GroupAlone], Index: lib/Target/SystemZ/SystemZScheduleZEC12.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZEC12.td +++ lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -337,6 +337,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "A(L)?G$")>; @@ -370,6 +371,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU, LSU, GroupAlone], @@ -519,6 +521,7 @@ def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXU, NormalGr], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "CH(Y)?$")>; Index: lib/Target/SystemZ/SystemZSelectMux.cpp =================================================================== --- /dev/null +++ lib/Target/SystemZ/SystemZSelectMux.cpp @@ -0,0 +1,646 @@ +//==------- SystemZSelectMux.cpp - Select Mux instructions --------*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that is run immediately after VirtRegRewriter +// and before MachineCopyPropagation. The purpose is to set the opcodes of +// GRX32 Mux pseudos to match the choice of registers which have been +// carefully hinted during register allocation. It is important to do this +// before any later pass might substitute a register for another without +// knowledge of the actual legal combinations of high/low register +// assignments. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "SystemZInstrInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/LiveStacks.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +using namespace llvm; + +#define SYSTEMZ_EXPAND_PSEUDO_NAME "SystemZ Mux pseudo selection pass" + +#define DEBUG_TYPE "systemz-selectmux" +STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)"); +STATISTIC(LOCRs, "Number of LOCRs"); +STATISTIC(MUXLOWs, "Number of Muxes using only low parts"); +STATISTIC(MUXHIGHs, "Number of Muxes using only high parts"); +STATISTIC(MUXHILOs, "Number of Muxes using high and low parts"); +STATISTIC(BADMUXs, "Number of Muxes ending up in illegal registers"); +STATISTIC(MuxCopyDst, "Number of Mux pseudos needing a copy to dst."); +STATISTIC(MuxRotate2, "Number of Mux pseudos needing two rotates of a reg."); +STATISTIC(MuxCmpSwap, "Number of Mux pseudo compares swapped."); +STATISTIC(MuxCmpNonSwappable, + "Number of Mux pseudo compares not swappable."); +STATISTIC(MuxCmpCCLiveOut, "Number of Mux pseudo compares with live out CC."); + +namespace llvm { + void initializeSystemZSelectMuxPass(PassRegistry&); +} + +namespace { +class SystemZSelectMux : public MachineFunctionPass { +public: + static char ID; + SystemZSelectMux() : MachineFunctionPass(ID) { + initializeSystemZSelectMuxPass(*PassRegistry::getPassRegistry()); + } + + const SystemZInstrInfo *TII; + const TargetRegisterInfo *TRI; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + StringRef getPassName() const override { return SYSTEMZ_EXPAND_PSEUDO_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + // XXX Need to update any analysis e.g. if expanding LOCRMUXr? (And + // marking as preserved here) + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + + struct MuxInfo { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineInstr *const MI; + bool Rotated; + bool DstRegUsed; + + MuxInfo(MachineInstr &mi_); + unsigned getReg(unsigned Idx) const { return MI->getOperand(Idx).getReg(); } + unsigned getReg64(unsigned Idx) const; + bool isHIGH(unsigned Idx) const { + assert(Idx < (MI->isCompare() ? 2 : 3) && "Operand index out of range."); + return SystemZ::GRH32BitRegClass.contains(getReg(Idx)); + } + bool isLOW(unsigned Idx) const { return !isHIGH(Idx); } + bool sameReg(unsigned A, unsigned B) const { return getReg(A) == getReg(B); } + bool differentRegs(unsigned A, unsigned B) const { return !sameReg(A, B); } + bool sameRegs64(unsigned A, unsigned B) const { + return getReg64(A) == getReg64(B); + } + bool differentRegs64(unsigned A, unsigned B) const { + return !sameRegs64(A, B); + } + + void useDstRegForOps(unsigned Idx); + void RotateReg0(); + void commute(); + bool trySelectAddSubMux(); + bool trySelectCmpMux(); + }; + + bool expandLOCRMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandAddSubMux(MachineBasicBlock::iterator MBBI); + bool expandCmpMux(MachineBasicBlock::iterator MBBI); + bool selectMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool selectMBB(MachineBasicBlock &MBB); + bool updateLiveInLists_CC(MachineBasicBlock &MBB); +}; +char SystemZSelectMux::ID = 0; +} + +INITIALIZE_PASS(SystemZSelectMux, "systemz-select-mux", + SYSTEMZ_EXPAND_PSEUDO_NAME, false, false) + +/// Returns an instance of the Select Mux pass. +FunctionPass *llvm::createSystemZSelectMuxPass(SystemZTargetMachine &TM) { + return new SystemZSelectMux(); +} + +SystemZSelectMux::MuxInfo::MuxInfo(MachineInstr &mi_) : MI(&mi_) { + const TargetSubtargetInfo &Subtarget = + MI->getParent()->getParent()->getSubtarget(); + TII = Subtarget.getInstrInfo(); + TRI = Subtarget.getRegisterInfo(); + Rotated = false; + DstRegUsed = false; +} + +unsigned SystemZSelectMux::MuxInfo::getReg64(unsigned Idx) const { + unsigned Reg = getReg(Idx); + unsigned SubRegIdx = (SystemZ::GR32BitRegClass.contains(Reg) ? + SystemZ::subreg_l32 : SystemZ::subreg_h32); + unsigned Reg64 = TRI->getMatchingSuperReg(Reg, SubRegIdx, + &SystemZ::GR64BitRegClass); + assert(Reg64 && "Could not find the supreregister of a GRX32 register."); + return Reg64; +} + +// Copy the source register at Idx into the dst-reg of MI and use dst-reg +// instead in any operands that use the copied source register. +void SystemZSelectMux::MuxInfo::useDstRegForOps(unsigned Idx) { + assert(MI->getOperand(0).isDef() && "Expected a defined register to use."); + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned ReplacedReg = getReg(Idx); + unsigned DstReg = getReg(0); + BuildMI(*MBB, MI, DL, TII->get(SystemZ::COPY), DstReg) + .addReg(ReplacedReg); + for (unsigned OpIdx = 1; OpIdx < 3; ++OpIdx) { + MachineOperand &MO = MI->getOperand(OpIdx); + if (MO.getReg() == ReplacedReg) + MO.setReg(DstReg); + } + MuxCopyDst++; + DstRegUsed = true; +} + +void SystemZSelectMux::MuxInfo::RotateReg0() { + MachineBasicBlock::iterator InsPos = MI; + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Reg64 = getReg64(0); + if (!Rotated) { + // Rotate the 64-bit register. Mark it as undef and add an extra use + // operand for the 32-bit register that is known to be live. + BuildMI(*MBB, InsPos, DL, TII->get(SystemZ::RLLG), Reg64) + .addReg(Reg64, getUndefRegState(true)) + .addReg(0).addImm(32) + .addReg(getReg(0)); + unsigned Reg = getReg(0); + unsigned OtherSubRegIdx = + isLOW(0) ? SystemZ::subreg_h32 : SystemZ::subreg_l32; + unsigned OtherReg = TRI->getSubReg(Reg64, OtherSubRegIdx); + unsigned NumOps = (MI->isCompare() ? 2 : 3); + for (unsigned Idx = 0; Idx < NumOps; ++Idx) { + MachineOperand &MO = MI->getOperand(Idx); + if (MO.getReg() == Reg) + MO.setReg(OtherReg); + else if (MO.getReg() == OtherReg) + MO.setReg(Reg); + } + Rotated = true; + } else { + // Rotate the 64-bit register back after MI. + BuildMI(*MBB, ++InsPos, DL, TII->get(SystemZ::RLLG), Reg64) + .addReg(Reg64) + .addReg(0).addImm(32); + } +} + +void SystemZSelectMux::MuxInfo::commute() { + // If MI is a subtraction, negate RHS and convert to addition. + bool ConvertedToAddition = true; + switch(MI->getOpcode()) { + case SystemZ::SRMux: + case SystemZ::SRMuxK: + MI->setDesc(TII->get(SystemZ::ARMux)); + break; + case SystemZ::SLRMux: + case SystemZ::SLRMuxK: + MI->setDesc(TII->get(SystemZ::ALRMux)); + break; + default: + ConvertedToAddition = false; + break; + } + if (ConvertedToAddition) { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Reg = getReg(2); + if (isLOW(2)) { + BuildMI(*MBB, MI, DL, TII->get(SystemZ::LNR), Reg) + .addReg(Reg); + } else { + BuildMI(*MBB, MI, DL, TII->get(SystemZ::XIHF), Reg) + .addReg(Reg) + .addImm(0xffffffff); + BuildMI(*MBB, MI, DL, TII->get(SystemZ::AIH), Reg) + .addReg(Reg) + .addImm(1); + } + } + + TII->commuteInstruction(*MI, false/*NewMI*/, 1, 2); +} + +bool SystemZSelectMux::MuxInfo::trySelectAddSubMux() { + unsigned LowOpcode, HiOpcode, HiHiLoOpcode; + unsigned Opc = MI->getOpcode(); + if (Opc == SystemZ::ARMux || Opc == SystemZ::ARMuxK) { + LowOpcode = SystemZ::AR; + HiOpcode = SystemZ::AHHHR; + HiHiLoOpcode = SystemZ::AHHLR; + } + else if (Opc == SystemZ::SRMux || Opc == SystemZ::SRMuxK) { + LowOpcode = SystemZ::SR; + HiOpcode = SystemZ::SHHHR; + HiHiLoOpcode = SystemZ::SHHLR; + } + else if (Opc == SystemZ::ALRMux || Opc == SystemZ::ALRMuxK) { + LowOpcode = SystemZ::ALR; + HiOpcode = SystemZ::ALHHHR; + HiHiLoOpcode = SystemZ::ALHHLR; + } + else if (Opc == SystemZ::SLRMux || Opc == SystemZ::SLRMuxK) { + LowOpcode = SystemZ::SLR; + HiOpcode = SystemZ::SLHHHR; + HiHiLoOpcode = SystemZ::SLHHLR; + } + else + llvm_unreachable("Bad opcode."); + + // Try to find an opcode to use. + unsigned Opcode = 0; + if (isLOW(0) && isLOW(1) && isLOW(2)) + Opcode = (getReg(0) == getReg(1) ? + LowOpcode : SystemZ::getThreeOperandOpcode(LowOpcode)); + else if (isHIGH(0) && isHIGH(1) && isHIGH(2)) + Opcode = HiOpcode; + else if (isHIGH(0) && isHIGH(1) && isLOW(2)) + Opcode = HiHiLoOpcode; + else + return false; + MI->setDesc(TII->get(Opcode)); + + // Make sure that the tied-to flag is correctly set or unset. + bool IsTied = MI->getOperand(1).isTied(); + bool ShouldBeTied = + (MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0); + if (IsTied && !ShouldBeTied) + MI->untieRegOperand(1); + else if (!IsTied && ShouldBeTied) + MI->tieOperands(0, 1); + + return true; +} + +bool SystemZSelectMux::MuxInfo::trySelectCmpMux() { + unsigned LowOpcode, HiOpcode, HiLowOpcode; + if (MI->getOpcode() == SystemZ::CRMux) { + LowOpcode = SystemZ::CR; + HiOpcode = SystemZ::CHHR; + HiLowOpcode = SystemZ::CHLR; + } else if (MI->getOpcode() == SystemZ::CLRMux) { + LowOpcode = SystemZ::CLR; + HiOpcode = SystemZ::CLHHR; + HiLowOpcode = SystemZ::CLHLR; + } else + llvm_unreachable("Bad opcode."); + + unsigned Opcode = 0; + if (isLOW(0) && isLOW(1)) + Opcode = LowOpcode; + else if (isHIGH(0) && isHIGH(1)) + Opcode = HiOpcode; + else if (isHIGH(0) && isLOW(1)) + Opcode = HiLowOpcode; + else + return false; + MI->setDesc(TII->get(Opcode)); + return true; +} + +// MI is a load-register-on-condition pseudo instruction that could not be +// handled as a single hardware instruction. Replace it by a branch sequence. +bool SystemZSelectMux::expandLOCRMux(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineFunction &MF = *MBB.getParent(); + const BasicBlock *BB = MBB.getBasicBlock(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(2).getReg(); + unsigned CCValid = MI.getOperand(3).getImm(); + unsigned CCMask = MI.getOperand(4).getImm(); + + LivePhysRegs LiveRegs(TII->getRegisterInfo()); + LiveRegs.addLiveOuts(MBB); + for (auto I = std::prev(MBB.end()); I != MBBI; --I) + LiveRegs.stepBackward(*I); + + // Splice MBB at MI, moving the rest of the block into RestMBB. + MachineBasicBlock *RestMBB = MF.CreateMachineBasicBlock(BB); + MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB); + RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end()); + RestMBB->transferSuccessors(&MBB); + for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) + RestMBB->addLiveIn(*I); + + // Create a new block MoveMBB to hold the move instruction. + MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB); + MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB); + MoveMBB->addLiveIn(SrcReg); + for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) + MoveMBB->addLiveIn(*I); + + // At the end of MBB, create a conditional branch to RestMBB if the + // condition is false, otherwise fall through to MoveMBB. + BuildMI(&MBB, DL, TII->get(SystemZ::BRC)) + .addImm(CCValid).addImm(CCMask ^ CCValid).addMBB(RestMBB); + MBB.addSuccessor(RestMBB); + MBB.addSuccessor(MoveMBB); + + // In MoveMBB, emit an instruction to move SrcReg into DestReg, + // then fall through to RestMBB. + TII->copyPhysReg(*MoveMBB, MoveMBB->end(), DL, DestReg, SrcReg, + MI.getOperand(2).isKill()); + MoveMBB->addSuccessor(RestMBB); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + return true; +} + +bool SystemZSelectMux::expandAddSubMux(MachineBasicBlock::iterator MBBI) { + MuxInfo MuxI(*MBBI); + bool IsAdd = MBBI->getDesc().isCommutable(); + if (MuxI.isLOW(0) && MuxI.isLOW(1) && MuxI.isLOW(2)) + MUXLOWs++; + else if (MuxI.isHIGH(0) && MuxI.isHIGH(1) && MuxI.isHIGH(2)) + MUXHIGHs++; + else if (MuxI.isHIGH(0) && (MuxI.isHIGH(1) || MuxI.isHIGH(2))) { + if (MuxI.isHIGH(2)) { // HLH is unsupported. + if (IsAdd) + MUXHILOs++; + else + BADMUXs++; + } else + MUXHILOs++; + } + else + BADMUXs++; + + // 1. See if legal already (after a free commute if needed). + if (IsAdd && MuxI.isHIGH(0) && MuxI.isLOW(1) && MuxI.isHIGH(2)) + MuxI.commute(); + if (MuxI.trySelectAddSubMux()) + return true; + // 2. Use destination register if not used by any source operand and if it + // would make operands legal. + if (MuxI.differentRegs(0, 1) && MuxI.differentRegs(0, 2)) { + if (MuxI.isLOW(0)) { + if (MuxI.isLOW(1) && MuxI.isHIGH(2)) + MuxI.useDstRegForOps(2); // LLH -> LLL + else if (MuxI.isHIGH(1) && (MuxI.isLOW(2) || MuxI.sameReg(1, 2))) + MuxI.useDstRegForOps(1); // LHL/LH0H0 -> LLL + } + else if (MuxI.isLOW(1)) + MuxI.useDstRegForOps(1); // HLX -> HHX + } + // 3. Handle the remaining cases + if (!MuxI.DstRegUsed) { + if (MuxI.isLOW(0) && MuxI.isLOW(1) && MuxI.isHIGH(2)) { + assert(MuxI.sameReg(0, 1) && "Case with free dst reg already handled."); + MuxI.RotateReg0(); // LLH -> HHH + } + else if (MuxI.isLOW(0) && MuxI.isHIGH(1) && MuxI.isLOW(2)) { + assert(MuxI.sameReg(0, 2) && "Case with free dst reg already handled."); + if (MuxI.sameRegs64(0, 1)) + MuxI.commute(); // LHL -> LLH + MuxI.RotateReg0(); // LHL/LLH -> HHX + } + else if (MuxI.isLOW(0) && MuxI.isHIGH(1) && MuxI.isHIGH(2)) { + MuxI.RotateReg0(); // LHH -> HHH/HLH/HHL/HLL + if (MuxI.sameRegs64(0, 1)) + MuxI.useDstRegForOps(1); // HLX -> HHH + } + else if (MuxI.isHIGH(0) && MuxI.isLOW(1) && MuxI.isHIGH(2)) { + assert(MuxI.sameReg(0, 2) && "Case with free dst reg already handled."); + assert(!IsAdd && "Add was handled earlier."); + if (MuxI.differentRegs64(0, 1)) + MuxI.RotateReg0(); // HLH -> LLL + else + MuxI.commute(); // HLH -> HHL + } + } + + bool Success = MuxI.trySelectAddSubMux(); + assert(Success && "Expected a legal register combination by now."); + if (MuxI.Rotated) { + MuxI.RotateReg0(); + MuxRotate2++; + } + + return true; +} + +bool SystemZSelectMux::expandCmpMux(MachineBasicBlock::iterator MBBI) { + MachineBasicBlock *MBB = MBBI->getParent(); + // MBBI is a 32 bit compare with register pseudo instruction. Replace it + // with one of the three possible opcodes. "LowHi" is not supported. + SystemZSelectMux::MuxInfo MuxI(*MBBI); + if (MuxI.isLOW(0) && MuxI.isLOW(1)) + MUXLOWs++; + else if (MuxI.isHIGH(0) && MuxI.isHIGH(1)) + MUXHIGHs++; + else if (MuxI.isHIGH(0) && MuxI.isLOW(1)) + MUXHILOs++; + else + BADMUXs++; + + // 1. See if legal already + if (MuxI.trySelectCmpMux()) + return true; + assert(MuxI.isLOW(0) && MuxI.isHIGH(1) && "Only Cmp Lo/Hi is illegal"); + + // 2. Try to swap the operands if possible. + if (!SystemZRegisterInfo::isCCLiveOut(*MBB)) { + bool Swappable = true; + SmallVector CCUsers; + for (MachineBasicBlock::iterator Itr = std::next(MBBI); + Itr != MBB->end(); ++Itr) { + if (Itr->readsRegister(SystemZ::CC)) { + unsigned Flags = Itr->getDesc().TSFlags; + if ((Flags & SystemZII::CCMaskFirst) || (Flags & SystemZII::CCMaskLast)) + CCUsers.push_back(&*Itr); + else { + Swappable = false; + MuxCmpNonSwappable++; + break; + } + } + if (Itr->definesRegister(SystemZ::CC)) + break; + } + if (Swappable) { + assert(CCUsers.size() && "No CC users found?"); + // Update all CC users. + for (unsigned Idx = 0; Idx < CCUsers.size(); ++Idx) { + unsigned Flags = CCUsers[Idx]->getDesc().TSFlags; + unsigned FirstOpNum = ((Flags & SystemZII::CCMaskFirst) ? + 0 : CCUsers[Idx]->getNumExplicitOperands() - 2); + MachineOperand &CCMaskMO = CCUsers[Idx]->getOperand(FirstOpNum + 1); + switch(CCMaskMO.getImm()) { + case SystemZ::CCMASK_CMP_LT: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_GT); + break; + case SystemZ::CCMASK_CMP_GT: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_LT); + break; + case SystemZ::CCMASK_CMP_LE: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_GE); + break; + case SystemZ::CCMASK_CMP_GE: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_LE); + break; + case SystemZ::CCMASK_CMP_EQ: + case SystemZ::CCMASK_CMP_NE: + break; + default: + llvm_unreachable("Unexpected CCMask value."); + break; + } + } + // The compare is not marked as commutable, so swap the registers and + // flags here. + MachineOperand &LHS = MuxI.MI->getOperand(0); + MachineOperand &RHS = MuxI.MI->getOperand(1); + MachineOperand Tmp = MachineOperand(LHS); + LHS.setReg(RHS.getReg()); + RHS.setReg(Tmp.getReg()); + LHS.setIsKill(RHS.isKill()); + RHS.setIsKill(Tmp.isKill()); + LHS.setIsUndef(RHS.isUndef()); + RHS.setIsUndef(Tmp.isUndef()); + + bool Success = MuxI.trySelectCmpMux(); + assert(Success && "Expected a legal register combination by now."); + MuxCmpSwap++; + return true; + } + } + else + MuxCmpCCLiveOut++; + + // 3. Use rotations. + MuxI.RotateReg0(); + bool Success = MuxI.trySelectCmpMux(); + assert(Success && "Expected a legal register combination by now."); + MuxI.RotateReg0(); + MuxRotate2++; + return true; +} + +/// If MBBI references a pseudo instruction that should be selected here, +/// do it and return true. Otherwise return false. +bool SystemZSelectMux::selectMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + case SystemZ::LOCRMux: { + MuxInfo MuxI(MI); + if (MuxI.isLOW(0) && MuxI.isLOW(2)) + MI.setDesc(TII->get(SystemZ::LOCR)); + else if (MuxI.isHIGH(0) && MuxI.isHIGH(2)) + MI.setDesc(TII->get(SystemZ::LOCFHR)); + else { + expandLOCRMux(MBB, MBBI, NextMBBI); + LOCRMuxJumps++; + return true; + } + LOCRs++; + return true; + } + + case SystemZ::CHIMux: { + bool IsHigh = SystemZ::GRH32BitRegClass.contains(MI.getOperand(0).getReg()); + MI.setDesc(TII->get(IsHigh ? SystemZ::CIH :SystemZ::CHI)); + return true; + } + + case SystemZ::CFIMux: { + bool IsHigh = SystemZ::GRH32BitRegClass.contains(MI.getOperand(0).getReg()); + MI.setDesc(TII->get(IsHigh ? SystemZ::CIH :SystemZ::CFI)); + return true; + } + + case SystemZ::CRMux: + case SystemZ::CLRMux: + return expandCmpMux(MBBI); + + case SystemZ::ARMux: + case SystemZ::ARMuxK: + case SystemZ::ALRMux: + case SystemZ::ALRMuxK: + case SystemZ::SRMux: + case SystemZ::SRMuxK: + case SystemZ::SLRMux: + case SystemZ::SLRMuxK: + return expandAddSubMux(MBBI); + + default: + break; + } + + return false; +} + +/// Iterate over the instructions in basic block MBB and select any +/// pseudo instructions. Return true if anything was modified. +bool SystemZSelectMux::selectMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= selectMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool SystemZSelectMux::updateLiveInLists_CC(MachineBasicBlock &MBB) { + if (MBB.isLiveIn(SystemZ::CC)) + return false; + bool Missing = false; + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + for (; MBBI != E; ++MBBI) { + if (MBBI->isDebugInstr()) + continue; + if (MBBI->readsRegister(SystemZ::CC)) { + Missing = true; + break; + } + if (MBBI->definesRegister(SystemZ::CC)) + break; + } + if (Missing || (MBBI == E && SystemZRegisterInfo::isCCLiveOut(MBB))) { + assert(&MBB != &*MBB.getParent()->begin() && + "CC can not be live-in in the entry block of the function."); + LLVM_DEBUG(dbgs() << "SystemZSelectMux: Adding missing CC to MBB " + << "live-in list!\n"); + MBB.addLiveIn(SystemZ::CC); + return true; + } + return false; +} + +bool SystemZSelectMux::runOnMachineFunction(MachineFunction &MF) { + TII = static_cast(MF.getSubtarget().getInstrInfo()); + TRI = MF.getSubtarget().getRegisterInfo(); + + // We must know about all CC users when expanding CRMux, so better make + // sure all live-in CCs are in the live-in lists. + bool Change = true; + while (Change) { + Change = false; + for (auto &MBB : MF) + Change |= updateLiveInLists_CC(MBB); + } + + bool Modified = false; + for (auto &MBB : MF) + Modified |= selectMBB(MBB); + + return Modified; +} + Index: lib/Target/SystemZ/SystemZTargetMachine.cpp =================================================================== --- lib/Target/SystemZ/SystemZTargetMachine.cpp +++ lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -182,6 +182,7 @@ void addIRPasses() override; bool addInstSelector() override; bool addILPOpts() override; + void addPostRewrite() override; void addPreSched2() override; void addPreEmitPass() override; }; @@ -211,8 +212,14 @@ return true; } +void SystemZPassConfig::addPostRewrite() { + addPass(createSystemZSelectMuxPass(getSystemZTargetMachine())); +} + void SystemZPassConfig::addPreSched2() { - addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine())); + // This is added in addPostRewrite() when RAGreedy is run (at -O1 and above). + if (getOptLevel() == CodeGenOpt::None) + addPass(createSystemZSelectMuxPass(getSystemZTargetMachine())); if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -350,7 +350,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const override; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; /// foldMemoryOperand - Same as the previous version except it allows folding /// of any load and store from / to any address, not just from a specific Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -4759,7 +4759,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, - int FrameIndex, LiveIntervals *LIS) const { + int FrameIndex, LiveIntervals *LIS, + VirtRegMap *VRM) const { // Check switch flag if (NoFusing) return nullptr; Index: test/CodeGen/SystemZ/debuginstr-00.mir =================================================================== --- test/CodeGen/SystemZ/debuginstr-00.mir +++ test/CodeGen/SystemZ/debuginstr-00.mir @@ -63,7 +63,7 @@ bb.1.bb2: liveins: $r0l - CHIMux renamable $r0l, 0, implicit-def $cc + CHI renamable $r0l, 0, implicit-def $cc BRC 14, 6, %bb.1, implicit killed $cc bb.2.bb3.preheader: Index: test/CodeGen/SystemZ/expand-mux-pseudos.mir =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/expand-mux-pseudos.mir @@ -0,0 +1,305 @@ +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -start-after=postrapseudos \ +# RUN: -verify-machineinstrs -enable-post-misched=false %s -o - | FileCheck %s +# +# Test that SystemZExpandPseudo can handle all combinations of high/low +# registers and transform all Add / Sub Mux instructions into legal target +# instructions. + +name: fun0 +body: | + bb.0: + successors: %bb.1 + $r0l = LHI 0 + $r0h = IIHF 0 + $r1l = LHI 0 + $r1h = IIHF 0 + $r2l = LHI 0 + $r2h = IIHF 0 + + ; CHECK-LABEL: fun0 + ; CHECK: llihl %r2, 0 + + ;; ARMuxK + + ; LLL + renamable $r2l = ARMuxK renamable $r1l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: ark %r2, %r1, %r0 + renamable $r2l = ARMuxK renamable $r2l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: ar %r2, %r0 + + ; LLH + renamable $r2l = ARMuxK renamable $r1l, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: ark %r2, %r1, %r2 + renamable $r2l = ARMuxK renamable $r2l, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhhr %r2, %r2, %r1 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = ARMuxK renamable $r2l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhlr %r2, %r2, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + + ; LHL + renamable $r2l = ARMuxK renamable $r1h, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: risblg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: ar %r2, %r1 + renamable $r2l = ARMuxK renamable $r1h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhhr %r2, %r1, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = ARMuxK renamable $r2h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhlr %r2, %r2, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + + ; LHH + renamable $r2l = ARMuxK renamable $r1h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhhr %r2, %r1, %r0 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = ARMuxK renamable $r2h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: risbhg %r2, %r2, 0, 159, 32 + ; CHECK-NEXT: ahhhr %r2, %r2, %r0 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = ARMuxK renamable $r0h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhlr %r2, %r0, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = ARMuxK renamable $r2h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r2, %r2, 0, 159, 32 + ; CHECK-NEXT: ar %r2, %r2 + + ; HLL + renamable $r2h = ARMuxK renamable $r1l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: ahhlr %r2, %r2, %r0 + renamable $r2h = ARMuxK renamable $r1l, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: ahhhr %r2, %r2, %r2 + + ; HLH + renamable $r2h = ARMuxK renamable $r1l, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: ahhlr %r2, %r0, %r1 + renamable $r2h = ARMuxK renamable $r1l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: ahhlr %r2, %r2, %r1 + renamable $r2h = ARMuxK renamable $r2l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: ahhlr %r2, %r2, %r2 + + ; HHL + renamable $r2h = ARMuxK renamable $r1h, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: ahhlr %r2, %r1, %r1 + ; HHH + renamable $r2h = ARMuxK renamable $r1h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: ahhhr %r2, %r1, %r0 + + ;; SRMuxK + + ; LLL + renamable $r2l = SRMuxK renamable $r1l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: srk %r2, %r1, %r0 + renamable $r2l = SRMuxK renamable $r2l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: sr %r2, %r0 + + ; LLH + renamable $r2l = SRMuxK renamable $r1l, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: srk %r2, %r1, %r2 + renamable $r2l = SRMuxK renamable $r2l, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: shhhr %r2, %r2, %r1 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = SRMuxK renamable $r2l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: shhlr %r2, %r2, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + + ; LHL + renamable $r2l = SRMuxK renamable $r1h, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: risblg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: sr %r2, %r1 + renamable $r2l = SRMuxK renamable $r1h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: shhhr %r2, %r1, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = SRMuxK renamable $r2h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: lnr %r2, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhlr %r2, %r2, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + + ; LHH + renamable $r2l = SRMuxK renamable $r1h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: shhhr %r2, %r1, %r0 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = SRMuxK renamable $r2h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: risbhg %r2, %r2, 0, 159, 32 + ; CHECK-NEXT: shhhr %r2, %r2, %r0 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = SRMuxK renamable $r0h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: shhlr %r2, %r0, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = SRMuxK renamable $r2h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r2, %r2, 0, 159, 32 + ; CHECK-NEXT: sr %r2, %r2 + + ; HLL + renamable $r2h = SRMuxK renamable $r1l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: shhlr %r2, %r2, %r0 + renamable $r2h = SRMuxK renamable $r1l, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: shhhr %r2, %r2, %r2 + + ; HLH + renamable $r2h = SRMuxK renamable $r1l, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: shhhr %r2, %r2, %r0 + renamable $r2h = SRMuxK renamable $r1l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: srk %r2, %r1, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2h = SRMuxK renamable $r2l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: xihf %r2, 4294967295 + ; CHECK-NEXT: aih %r2, 1 + ; CHECK-NEXT: ahhlr %r2, %r2, %r2 + + ; HHL + renamable $r2h = SRMuxK renamable $r1h, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: shhlr %r2, %r1, %r1 + + ; HHH + renamable $r2h = SRMuxK renamable $r1h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: shhhr %r2, %r1, %r0 + + bb.1: + successors: + +... + +# Test comparisons. + +--- +name: fun1 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + $r0l = LHI 0 + $r0h = IIHF 0 + $r1l = LHI 0 + $r1h = IIHF 0 + $r2l = LHI 0 + $r2h = IIHF 0 + + ; CHECK-LABEL: fun1 + ; CHECK: iihf %r2, 0 + + CRMux renamable $r1l, renamable $r2l, implicit-def $cc + BRC 14, 4, %bb.2, implicit killed $cc + ; CHECK-NEXT: crjl %r1, %r2, .LBB1_2 + + bb.1: + successors: + $r0l = LHI 0 + + bb.2: + successors: %bb.3, %bb.4 + liveins: $r1h, $r2l + + CRMux renamable $r1h, renamable $r2l, implicit-def $cc + BRC 14, 4, %bb.4, implicit killed $cc + ; CHECK-LABEL: .LBB1_2: + ; CHECK-NEXT: chlr %r1, %r2 + ; CHECK-NEXT: jl .LBB1_4 + + bb.3: + successors: + $r0l = LHI 0 + + bb.4: + successors: %bb.5, %bb.6 + liveins: $r1h, $r2h + + CRMux renamable $r1h, renamable $r2h, implicit-def $cc + BRC 14, 4, %bb.6, implicit killed $cc + ; CHECK-LABEL: .LBB1_4: + ; CHECK-NEXT: chhr %r1, %r2 + ; CHECK-NEXT: jl .LBB1_6 + + bb.5: + successors: + $r0l = LHI 0 + + bb.6: + successors: %bb.7, %bb.8 + liveins: $r1l, $r2h + + CRMux renamable $r1l, renamable $r2h, implicit-def $cc + BRC 14, 4, %bb.8, implicit killed $cc + ; CHECK-LABEL: .LBB1_6: + ; CHECK-NEXT: chlr %r2, %r1 + ; CHECK-NEXT: jh .LBB1_8 + + bb.7: + successors: + $r0l = LHI 0 + + bb.8: + liveins: $r1l, $r2h + + CRMux renamable $r1l, renamable $r2h, implicit-def $cc + ; CHECK-LABEL: .LBB1_8: + ; CHECK-NEXT: rllg %r1, %r1, 32 + ; CHECK-NEXT: chhr %r1, %r2 + ; CHECK-NEXT: rllg %r1, %r1, 32 + ; CHECK-NEXT: jl .LBB1_10 + + bb.9: + successors: %bb.10, %bb.11 + liveins: $cc + BRC 14, 4, %bb.11, implicit killed $cc + + bb.10: + successors: + $r0l = LHI 0 + + bb.11: + successors: %bb.12, %bb.13 + liveins: $r1l, $r1h, $r2l + + CRMux renamable $r1h, renamable $r2l, implicit-def $cc + renamable $r1l = LOCHI killed renamable $r1l, 0, 14, 8, implicit $cc + BRC 14, 4, %bb.13, implicit killed $cc + ; CHECK-LABEL: .LBB1_10: + ; CHECK-NEXT: chlr %r1, %r2 + ; CHECK-NEXT: lochie %r1, 0 + ; CHECK-NEXT: jl .LBB1_12 + + bb.12: + successors: + $r0l = LHI 0 + + bb.13: + successors: %bb.14, %bb.15 + liveins: $r1l, $r1h, $r2l + + CRMux renamable $r2l, renamable $r1h, implicit-def $cc + renamable $r1l = LOCHI killed renamable $r1l, 0, 14, 8, implicit $cc + BRC 14, 4, %bb.15, implicit killed $cc + ; CHECK-LABEL: .LBB1_12: + ; CHECK-NEXT: chlr %r1, %r2 + ; CHECK-NEXT: lochie %r1, 0 + ; CHECK-NEXT: jh .LBB1_14 + + bb.14: + successors: + $r0l = LHI 0 + + bb.15: + +...