Index: include/llvm/CodeGen/TargetPassConfig.h =================================================================== --- include/llvm/CodeGen/TargetPassConfig.h +++ include/llvm/CodeGen/TargetPassConfig.h @@ -378,6 +378,11 @@ return false; } + /// Add passes to be run immediately after virtual registers are rewritten + /// to physical registers. These passes may replace an MI with a new one, + /// but should preserve SlotIndexes while doing so. + virtual void addPostRewrite() { } + /// This method may be implemented by targets that want to run passes after /// register allocation pass pipeline but before prolog-epilog insertion. virtual void addPostRegAlloc() { } Index: include/llvm/CodeGen/TargetRegisterInfo.h =================================================================== --- include/llvm/CodeGen/TargetRegisterInfo.h +++ include/llvm/CodeGen/TargetRegisterInfo.h @@ -813,6 +813,13 @@ const LiveRegMatrix *Matrix = nullptr) const; + // COMMENT GCC - legal operand combinations + virtual bool allowHintRecoloring(unsigned VReg, + unsigned NewPhysReg, + const VirtRegMap *VRM) const { + return true; + } + /// A callback to allow target a chance to update register allocation hints /// when a register is "changed" (e.g. coalesced) to another register. /// e.g. On ARM, some virtual registers should target register pairs, Index: lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- lib/CodeGen/RegAllocGreedy.cpp +++ lib/CodeGen/RegAllocGreedy.cpp @@ -2966,6 +2966,13 @@ LLVM_DEBUG(dbgs() << "=> Not profitable.\n"); continue; } + + // Check with target for any special replacements to avoid. + if (!TRI->allowHintRecoloring(Reg, PhysReg, VRM)) { + LLVM_DEBUG(dbgs() << "=> Not suitable.\n"); + continue; + } + // At this point, the cost is either cheaper or equal. If it is // equal, we consider this is profitable because it may expose // more recoloring opportunities. Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -1154,6 +1154,10 @@ // Finally rewrite virtual registers. addPass(&VirtRegRewriterID); + // Allow targets to expand pseudo instructions depending on the choice of + // registers. + addPostRewrite(); + // Perform stack slot coloring and post-ra machine LICM. // // FIXME: Re-enable coloring with register when it's capable of adding Index: lib/Target/SystemZ/CMakeLists.txt =================================================================== --- lib/Target/SystemZ/CMakeLists.txt +++ lib/Target/SystemZ/CMakeLists.txt @@ -30,6 +30,7 @@ SystemZMCInstLower.cpp SystemZRegisterInfo.cpp SystemZSelectionDAGInfo.cpp + SystemZSelectMux.cpp SystemZShortenInst.cpp SystemZSubtarget.cpp SystemZTargetMachine.cpp Index: lib/Target/SystemZ/SystemZ.h =================================================================== --- lib/Target/SystemZ/SystemZ.h +++ lib/Target/SystemZ/SystemZ.h @@ -194,6 +194,7 @@ FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZSelectMuxPass(SystemZTargetMachine &TM); FunctionPass *createSystemZTDCPass(); } // end namespace llvm Index: lib/Target/SystemZ/SystemZElimCompare.cpp =================================================================== --- lib/Target/SystemZ/SystemZElimCompare.cpp +++ lib/Target/SystemZ/SystemZElimCompare.cpp @@ -35,6 +35,7 @@ #define DEBUG_TYPE "systemz-elim-compare" STATISTIC(BranchOnCounts, "Number of branch-on-count instructions"); +STATISTIC(LoadAndTests, "Number of load-and-test instructions"); STATISTIC(LoadAndTraps, "Number of load-and-trap instructions"); STATISTIC(EliminatedComparisons, "Number of eliminated comparisons"); STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions"); @@ -302,6 +303,7 @@ MIB.setMemRefs(MI.memoperands()); MI.eraseFromParent(); + LoadAndTests++; return true; } Index: lib/Target/SystemZ/SystemZExpandPseudo.cpp =================================================================== --- lib/Target/SystemZ/SystemZExpandPseudo.cpp +++ lib/Target/SystemZ/SystemZExpandPseudo.cpp @@ -16,6 +16,7 @@ #include "SystemZ.h" #include "SystemZInstrInfo.h" #include "SystemZSubtarget.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -23,6 +24,14 @@ #define SYSTEMZ_EXPAND_PSEUDO_NAME "SystemZ pseudo instruction expansion pass" +#define DEBUG_TYPE "systemz-expandpseudo" +STATISTIC(MuxCopyDst, "Number of Mux pseudos needing a copy to dst."); +STATISTIC(MuxRotate2, "Number of Mux pseudos needing two rotates of a reg."); +STATISTIC(MuxCmpSwap, "Number of Mux pseudo compares swapped."); +STATISTIC(MuxCmpNonUpdatable, + "Number of Mux pseudo compares not locally updatable."); +STATISTIC(MuxCmpCCLiveOut, "Number of Mux pseudo compares with live out CC."); + namespace llvm { void initializeSystemZExpandPseudoPass(PassRegistry&); } @@ -36,6 +45,8 @@ } const SystemZInstrInfo *TII; + const TargetRegisterInfo *TRI; + const SystemZSubtarget *Subtarget; bool runOnMachineFunction(MachineFunction &Fn) override; @@ -47,6 +58,8 @@ MachineBasicBlock::iterator &NextMBBI); bool expandLOCRMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); + bool expandAddSubMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); + bool expandCmpMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); }; char SystemZExpandPseudo::ID = 0; } @@ -111,6 +124,164 @@ return true; } +bool SystemZExpandPseudo::expandAddSubMux(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + unsigned Regs[3]; + bool IsKill[3]; + for (unsigned OpIdx = 0; OpIdx < 3; ++OpIdx) { + Regs[OpIdx] = MI.getOperand(OpIdx).getReg(); + if (OpIdx > 0) + IsKill[OpIdx] = MI.getOperand(OpIdx).isKill(); + } + + unsigned LowOpcode, HiOpcode, HiHiLoOpcode; + TII->getAddSubMuxOpcodes(LowOpcode, HiOpcode, HiHiLoOpcode, MI.getOpcode()); + unsigned CopyToDstOpc = 0; + unsigned SrcIdx = UINT_MAX; + if (SystemZ::GR32BitRegClass.contains(Regs[0]) && // LLH, LHL -> LLL + (SystemZ::GR32BitRegClass.contains(Regs[1]) != + SystemZ::GR32BitRegClass.contains(Regs[2]))) { + CopyToDstOpc = LowOpcode; + SrcIdx = SystemZ::GRH32BitRegClass.contains(Regs[1]) ? 1 : 2; + } else if (SystemZ::GRH32BitRegClass.contains(Regs[0]) && // HLL -> HHL + SystemZ::GR32BitRegClass.contains(Regs[1])) { // HLH -> HHH + SrcIdx = 1; + CopyToDstOpc = (SystemZ::GR32BitRegClass.contains(Regs[2]) ? + HiHiLoOpcode : HiOpcode); + } + if (CopyToDstOpc) { + TII->copyPhysReg(MBB, MBBI, DL, Regs[0], Regs[SrcIdx], IsKill[SrcIdx]); + Regs[SrcIdx] = Regs[0]; + IsKill[SrcIdx] = true; + BuildMI(MBB, MBBI, DL, TII->get(CopyToDstOpc), Regs[0]) + .addReg(Regs[1], getKillRegState(IsKill[1])) + .addReg(Regs[2], getKillRegState(IsKill[2])); + MI.eraseFromParent(); + MuxCopyDst++; + return true; + } + + // LHH -> rotate; HHH; rotate + if (SystemZ::GR32BitRegClass.contains(Regs[0]) && + SystemZ::GRH32BitRegClass.contains(Regs[1]) && + SystemZ::GRH32BitRegClass.contains(Regs[2])) { + unsigned Dst64 = TRI->getMatchingSuperReg(Regs[0], SystemZ::subreg_l32, + &SystemZ::GR64BitRegClass); + unsigned RotateOpc = (Subtarget->hasMiscellaneousExtensions() ? + SystemZ::RISBGN : SystemZ::RISBG); + // Rotate + BuildMI(MBB, MBBI, DL, TII->get(RotateOpc), Dst64) + .addReg(Dst64).addReg(Dst64).addImm(0).addImm(63).addImm(32); + // HHH + BuildMI(MBB, MBBI, DL, TII->get(HiOpcode), + TRI->getSubReg(Dst64, SystemZ::subreg_h32)) + .addReg(Regs[1], getKillRegState(IsKill[1])) + .addReg(Regs[2], getKillRegState(IsKill[2])); + // Rotate + BuildMI(MBB, MBBI, DL, TII->get(RotateOpc), Dst64) + .addReg(Dst64).addReg(Dst64).addImm(0).addImm(63).addImm(32); + MI.eraseFromParent(); + MuxRotate2++; + return true; + } + + // TODO: Currently MachineCopyProp may still change around registers of a + // Mux that was illegal after RegAlloc. Since that can lead to legal + // combinations, those should also be handled here. + llvm_unreachable("Undhandled case!"); + return false; +} + +// Return true if CC is live out of MBB. +static bool isCCLiveOut(MachineBasicBlock &MBB) { + for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(SystemZ::CC)) + return true; + return false; +} + +bool SystemZExpandPseudo::expandCmpMux(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + MachineOperand &LHSOp = MI.getOperand(0); + MachineOperand &RHSOp = MI.getOperand(1); + + if (!(SystemZ::GR32BitRegClass.contains(LHSOp.getReg()) && + SystemZ::GRH32BitRegClass.contains(RHSOp.getReg()))) { + // TODO: Currently MachineCopyProp may still change around registers of a + // Mux that was illegal after RegAlloc. Since that can lead to legal + // combinations, those should also be handled here. + llvm_unreachable("Undhandled case!"); + return false; + } + + if (!isCCLiveOut(MBB)) { + bool Swappable = true; + SmallVector CCUsers; + for (MachineBasicBlock::iterator Itr = std::next(MBBI); + Itr != MBB.end(); ++Itr) { + if (Itr->readsRegister(SystemZ::CC)) { + unsigned Flags = Itr->getDesc().TSFlags; + if (!(Flags & SystemZII::CCMaskFirst) && + !(Flags & SystemZII::CCMaskLast)) { // XXX Is this test correct? + Swappable = false; + MuxCmpNonUpdatable++; + break; + } + CCUsers.push_back(&*Itr); + } + if (Itr->definesRegister(SystemZ::CC)) + break; + } + + if (Swappable) { + assert(CCUsers.size() && "No CC users found?"); + // Update all CC users. + for (unsigned Idx = 0; Idx < CCUsers.size(); ++Idx) { + unsigned Flags = CCUsers[Idx]->getDesc().TSFlags; + unsigned FirstOpNum = ((Flags & SystemZII::CCMaskFirst) ? + 0 : CCUsers[Idx]->getNumExplicitOperands() - 2); + unsigned CCValid = CCUsers[Idx]->getOperand(FirstOpNum).getImm(); + unsigned CCMask = CCUsers[Idx]->getOperand(FirstOpNum + 1).getImm(); + CCUsers[Idx]->getOperand(FirstOpNum + 1).setImm(CCMask ^ CCValid); + } + // Swap the compare operands. + unsigned HiLowOpcode = + (MI.getOpcode() == SystemZ::CRMux ? SystemZ::CHLR : SystemZ::CLHLR); + BuildMI(MBB, MBBI, DL, TII->get(HiLowOpcode)) + .add(RHSOp).add(LHSOp); + MI.eraseFromParent(); + MuxCmpSwap++; + return true; + } + } + else + MuxCmpCCLiveOut++; + + unsigned HiOpcode = + (MI.getOpcode() == SystemZ::CRMux ? SystemZ::CHHR : SystemZ::CLHHR); + unsigned Dst64 = TRI->getMatchingSuperReg(LHSOp.getReg(), SystemZ::subreg_l32, + &SystemZ::GR64BitRegClass); + unsigned RotateOpc = (Subtarget->hasMiscellaneousExtensions() ? + SystemZ::RISBGN : SystemZ::RISBG); + // Rotate + BuildMI(MBB, MBBI, DL, TII->get(RotateOpc), Dst64) + .addReg(Dst64).addReg(Dst64).addImm(0).addImm(63).addImm(32); + // Compare High / High + BuildMI(MBB, MBBI, DL, TII->get(HiOpcode)) + .addReg(TRI->getSubReg(Dst64, SystemZ::subreg_h32)) + .add(RHSOp); + // Rotate TODO: could be omitted if Dst64 is proven dead. + BuildMI(MBB, MBBI, DL, TII->get(RotateOpc), Dst64) + .addReg(Dst64).addReg(Dst64).addImm(0).addImm(63).addImm(32); + MI.eraseFromParent(); + MuxRotate2++; + return true; +} + /// If MBBI references a pseudo instruction that should be expanded here, /// do the expansion and return true. Otherwise return false. bool SystemZExpandPseudo::expandMI(MachineBasicBlock &MBB, @@ -120,6 +291,21 @@ switch (MI.getOpcode()) { case SystemZ::LOCRMux: return expandLOCRMux(MBB, MBBI, NextMBBI); + + case SystemZ::ARMux: + case SystemZ::ARMuxK: + case SystemZ::SRMux: + case SystemZ::SRMuxK: + case SystemZ::ALRMux: + case SystemZ::ALRMuxK: + case SystemZ::SLRMux: + case SystemZ::SLRMuxK: + return expandAddSubMux(MBB, MBBI); + + case SystemZ::CRMux: + case SystemZ::CLRMux: + return expandCmpMux(MBB, MBBI); + default: break; } @@ -143,6 +329,8 @@ bool SystemZExpandPseudo::runOnMachineFunction(MachineFunction &MF) { TII = static_cast(MF.getSubtarget().getInstrInfo()); + Subtarget = &MF.getSubtarget(); + TRI = Subtarget->getRegisterInfo(); bool Modified = false; for (auto &MBB : MF) Index: lib/Target/SystemZ/SystemZInstrFormats.td =================================================================== --- lib/Target/SystemZ/SystemZInstrFormats.td +++ lib/Target/SystemZ/SystemZInstrFormats.td @@ -4574,6 +4574,19 @@ let OpType = "reg"; } +// Like BinaryRRAndK, but expanded after RA depending on the choice of register. +multiclass BinaryRRAndKPseudo { + let NumOpsKey = key, Predicates = [FeatureHighWord] in { + let NumOpsValue = "3" in + def K : Pseudo<(outs cls1:$R1), (ins cls2:$R2, cls3:$R3), []>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1, + Constraints = "$R1 = $R1src" in + def "" : Pseudo<(outs cls1:$R1), (ins cls2:$R1src, cls3:$R3), + [(set cls1:$R1, (operator cls2:$R1src, cls3:$R3))]>; + } +} + // Like BinaryRI, but expanded after RA depending on the choice of register. class BinaryRIPseudo @@ -4621,6 +4634,15 @@ let AccessBytes = bytes; } +// Like CompareRRE, but expanded after RA depending on the choice of +// register. +class CompareRREPseudo + : Pseudo<(outs), (ins cls1:$R1, cls2:$R2), + [(set CC, (operator cls1:$R1, cls2:$R2))]> { + let isCompare = 1; +} + // Like TestBinarySIL, but expanded later. class TestBinarySILPseudo : Pseudo<(outs), (ins bdaddr12only:$BD1, imm:$I2), Index: lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.h +++ lib/Target/SystemZ/SystemZInstrInfo.h @@ -155,8 +155,6 @@ unsigned HighOpcode) const; void expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode) const; - void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, - unsigned HighOpcode) const; void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned Size) const; void expandLoadStackGuard(MachineInstr *MI) const; @@ -315,6 +313,10 @@ bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA = nullptr) const override; + + unsigned getThreeOperandOpcode(unsigned Opcode) const; + void getAddSubMuxOpcodes(unsigned &LowOpcode, unsigned &HiOpcode, + unsigned &HiHiLoOpcode, unsigned Opc) const; }; } // end namespace llvm Index: lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.cpp +++ lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -46,7 +46,7 @@ #include "SystemZGenInstrInfo.inc" #define DEBUG_TYPE "systemz-II" -STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)"); +STATISTIC(PREGCOPIES, "Number of copies between allocatable physregs"); // Return a mask with Count low bits set. static uint64_t allOnes(unsigned int Count) { @@ -198,29 +198,6 @@ MI.setDesc(get(Opcode)); } -// MI is a load-register-on-condition pseudo instruction. Replace it with -// LowOpcode if source and destination are both low GR32s and HighOpcode if -// source and destination are both high GR32s. -void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, - unsigned HighOpcode) const { - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(2).getReg(); - bool DestIsHigh = isHighReg(DestReg); - bool SrcIsHigh = isHighReg(SrcReg); - - if (!DestIsHigh && !SrcIsHigh) - MI.setDesc(get(LowOpcode)); - else if (DestIsHigh && SrcIsHigh) - MI.setDesc(get(HighOpcode)); - else - LOCRMuxJumps++; - - // If we were unable to implement the pseudo with a single instruction, we - // need to convert it back into a branch sequence. This cannot be done here - // since the caller of expandPostRAPseudo does not handle changes to the CFG - // correctly. This change is defered to the SystemZExpandPseudo pass. -} - // MI is an RR-style pseudo instruction that zero-extends the low Size bits // of one GRX32 into another. Replace it with LowOpcode if both operands // are low registers, otherwise use RISB[LH]G. @@ -797,6 +774,7 @@ if (SystemZ::GRX32BitRegClass.contains(DestReg, SrcReg)) { emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc, false); + PREGCOPIES++; return; } @@ -813,6 +791,7 @@ BuildMI(MBB, MBBI, DL, get(SystemZ::VMRHG), DestReg) .addReg(SrcRegHi, getKillRegState(KillSrc)) .addReg(SrcRegLo, getKillRegState(KillSrc)); + PREGCOPIES++; return; } if (SystemZ::FP128BitRegClass.contains(DestReg) && @@ -828,6 +807,7 @@ copyPhysReg(MBB, MBBI, DL, DestRegHi, SrcReg, false); BuildMI(MBB, MBBI, DL, get(SystemZ::VREPG), DestRegLo) .addReg(SrcReg, getKillRegState(KillSrc)).addImm(1); + PREGCOPIES++; return; } @@ -878,6 +858,7 @@ BuildMI(MBB, MBBI, DL, get(Opcode), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); + PREGCOPIES++; } void SystemZInstrInfo::storeRegToStackSlot( @@ -1301,10 +1282,6 @@ expandLOCPseudo(MI, SystemZ::LOCHI, SystemZ::LOCHHI); return true; - case SystemZ::LOCRMux: - expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR); - return true; - case SystemZ::STCMux: expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH); return true; @@ -1385,14 +1362,6 @@ expandRIPseudo(MI, SystemZ::AFI, SystemZ::AIH, false); return true; - case SystemZ::CHIMux: - expandRIPseudo(MI, SystemZ::CHI, SystemZ::CIH, false); - return true; - - case SystemZ::CFIMux: - expandRIPseudo(MI, SystemZ::CFI, SystemZ::CIH, false); - return true; - case SystemZ::CLFIMux: expandRIPseudo(MI, SystemZ::CLFI, SystemZ::CLIH, false); return true; @@ -1815,3 +1784,28 @@ return false; } + +unsigned SystemZInstrInfo::getThreeOperandOpcode(unsigned Opcode) const { + return SystemZ::getThreeOperandOpcode(Opcode); +} + +void SystemZInstrInfo::getAddSubMuxOpcodes(unsigned &LowOpcode, + unsigned &HiOpcode, unsigned &HiHiLoOpcode, unsigned Opc) const { + if (Opc == SystemZ::ARMux || Opc == SystemZ::ARMuxK) { + LowOpcode = SystemZ::AR; + HiOpcode = SystemZ::AHHHR; + HiHiLoOpcode = SystemZ::AHHLR; + } else if (Opc == SystemZ::SRMux || Opc == SystemZ::SRMuxK) { + LowOpcode = SystemZ::SR; + HiOpcode = SystemZ::SHHHR; + HiHiLoOpcode = SystemZ::SHHLR; + } else if (Opc == SystemZ::ALRMux || Opc == SystemZ::ALRMuxK) { + LowOpcode = SystemZ::ALR; + HiOpcode = SystemZ::ALHHHR; + HiHiLoOpcode = SystemZ::ALHHLR; + } else if (Opc == SystemZ::SLRMux || Opc == SystemZ::SLRMuxK) { + LowOpcode = SystemZ::SLR; + HiOpcode = SystemZ::SLHHHR; + HiHiLoOpcode = SystemZ::SLHHLR; + } +} Index: lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.td +++ lib/Target/SystemZ/SystemZInstrInfo.td @@ -893,6 +893,7 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Addition of a register. let isCommutable = 1 in { + defm ARMux : BinaryRRAndKPseudo<"armux", z_sadd, GRX32, GRX32, GRX32>; defm AR : BinaryRRAndK<"ar", 0x1A, 0xB9F8, z_sadd, GR32, GR32>; defm AGR : BinaryRREAndK<"agr", 0xB908, 0xB9E8, z_sadd, GR64, GR64>; } @@ -935,6 +936,7 @@ let Defs = [CC] in { // Addition of a register. let isCommutable = 1 in { + defm ALRMux : BinaryRRAndKPseudo<"alrmux", z_uadd, GRX32, GRX32, GRX32>; defm ALR : BinaryRRAndK<"alr", 0x1E, 0xB9FA, z_uadd, GR32, GR32>; defm ALGR : BinaryRREAndK<"algr", 0xB90A, 0xB9EA, z_uadd, GR64, GR64>; } @@ -994,6 +996,7 @@ // Subtraction producing a signed overflow flag. let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Subtraction of a register. + defm SRMux : BinaryRRAndKPseudo<"srmux", z_ssub, GRX32, GRX32, GRX32>; defm SR : BinaryRRAndK<"sr", 0x1B, 0xB9F9, z_ssub, GR32, GR32>; def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>; defm SGR : BinaryRREAndK<"sgr", 0xB909, 0xB9E9, z_ssub, GR64, GR64>; @@ -1035,6 +1038,7 @@ // Subtraction producing a carry. let Defs = [CC] in { // Subtraction of a register. + defm SLRMux : BinaryRRAndKPseudo<"slrmux", z_usub, GRX32, GRX32, GRX32>; defm SLR : BinaryRRAndK<"slr", 0x1F, 0xB9FB, z_usub, GR32, GR32>; def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>; defm SLGR : BinaryRREAndK<"slgr", 0xB90B, 0xB9EB, z_usub, GR64, GR64>; @@ -1420,7 +1424,10 @@ // some of the signed forms have COMPARE AND BRANCH equivalents whereas none // of the unsigned forms do. let Defs = [CC], CCValues = 0xE in { - // Comparison with a register. + // Comparison with a register. CRMux expands to CR, CHHR or CHLR, + // depending on the choice of register. + def CRMux : CompareRREPseudo, + Requires<[FeatureHighWord]>; def CR : CompareRR <"cr", 0x19, z_scmp, GR32, GR32>; def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>; def CGR : CompareRRE<"cgr", 0xB920, z_scmp, GR64, GR64>; @@ -1472,7 +1479,10 @@ // Unsigned comparisons. let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { - // Comparison with a register. + // Comparison with a register. CLRMux expands to CLR, CLHHR or CLHLR, + // depending on the choice of register. + def CLRMux : CompareRREPseudo, + Requires<[FeatureHighWord]>; def CLR : CompareRR <"clr", 0x15, z_ucmp, GR32, GR32>; def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>; def CLGR : CompareRRE<"clgr", 0xB921, z_ucmp, GR64, GR64>; Index: lib/Target/SystemZ/SystemZRegisterInfo.h =================================================================== --- lib/Target/SystemZ/SystemZRegisterInfo.h +++ lib/Target/SystemZ/SystemZRegisterInfo.h @@ -56,6 +56,10 @@ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override; + bool allowHintRecoloring(unsigned VReg, + unsigned NewPhysReg, + const VirtRegMap *VRM) const override; + // Override TargetRegisterInfo.h. bool requiresRegisterScavenging(const MachineFunction &MF) const override { return true; Index: lib/Target/SystemZ/SystemZRegisterInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -21,9 +21,19 @@ #define GET_REGINFO_TARGET_DESC #include "SystemZGenRegisterInfo.inc" +#define DEBUG_TYPE "regalloc" + SystemZRegisterInfo::SystemZRegisterInfo() : SystemZGenRegisterInfo(SystemZ::R14D) {} +static const TargetRegisterClass *getRCFromPhysReg(unsigned PhysReg) { + if (SystemZ::GR32BitRegClass.contains(PhysReg)) + return &SystemZ::GR32BitRegClass; + assert (SystemZ::GRH32BitRegClass.contains(PhysReg) && + "Phys reg not in GR32 or GRH32?"); + return &SystemZ::GRH32BitRegClass; +} + // Given that MO is a GRX32 operand, return either GR32 or GRH32 if MO // somehow belongs in it. Otherwise, return GRX32. static const TargetRegisterClass *getRC32(MachineOperand &MO, @@ -40,14 +50,8 @@ MO.getSubReg() == SystemZ::subreg_hh32) return &SystemZ::GRH32BitRegClass; - if (VRM && VRM->hasPhys(MO.getReg())) { - unsigned PhysReg = VRM->getPhys(MO.getReg()); - if (SystemZ::GR32BitRegClass.contains(PhysReg)) - return &SystemZ::GR32BitRegClass; - assert (SystemZ::GRH32BitRegClass.contains(PhysReg) && - "Phys reg not in GR32 or GRH32?"); - return &SystemZ::GRH32BitRegClass; - } + if (VRM && VRM->hasPhys(MO.getReg())) + return getRCFromPhysReg(VRM->getPhys(MO.getReg())); assert (RC == &SystemZ::GRX32BitRegClass); return RC; @@ -73,6 +77,11 @@ Hints.push_back(Reg); } +// EXPERIMENTAL +#include "llvm/Support/CommandLine.h" +static cl::opt HARDHIORLO("hard-hi-or-lo", cl::init(false)); +static cl::opt SOFTHIORLO("soft-hi-or-lo", cl::init(false)); + bool SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, ArrayRef Order, @@ -81,59 +90,221 @@ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( VirtReg, Order, Hints, MF, VRM, Matrix); if (MRI->getRegClass(VirtReg) == &SystemZ::GRX32BitRegClass) { + // Compare/Add/Sub "High" Muxes. Not all combinations are supported by + // the target. + struct Reg2RCMap : std::map { + unsigned VirtReg_; // XXX + bool Change; + unsigned RequiredHigh; // USEFUL? + unsigned RequiredLow; + unsigned RequiredHigh_soft; + unsigned RequiredLow_soft; + std::set CountedMIs; // No improvement...(?) + void reset(unsigned Reg) { + VirtReg_ = Reg; + Change = false; + RequiredHigh = 0; + RequiredLow = 0; + RequiredHigh_soft = 0; + RequiredLow_soft = 0; + CountedMIs.clear(); + } + void findRC(MachineOperand &MO, + const VirtRegMap *VRM, + const MachineRegisterInfo *MRI) { + if (find(MO.getReg()) != end()) + return; + const TargetRegisterClass *RC = getRC32(MO, VRM, MRI); + (*this)[MO.getReg()] = RC; + } + void constrainReg(unsigned Reg, const TargetRegisterClass *RC, + MachineInstr *MI, bool HardHint = true) { + if ((*this)[Reg] != &SystemZ::GRX32BitRegClass) + return; // XXX What if two different? + if (Reg == VirtReg_) { + if (!CountedMIs.count(MI)) { + if (HardHint) { + if (RC == &SystemZ::GR32BitRegClass) + RequiredLow++; + else + RequiredHigh++; + } else { + if (RC == &SystemZ::GR32BitRegClass) + RequiredLow_soft++; + else + RequiredHigh_soft++; + } + CountedMIs.insert(MI); + } + } else if (HardHint) { + (*this)[Reg] = RC; + Change = true; + } + } + } Reg2RC; + // Search all use/def connected instructions iteratively to propagate the + // requirements of registers being in either GR32 or GR32H. At the end, + // pass hints for VirtReg if either GR32 or GR32H seems preferred. + // XXX: Make sure not to revisit same instruction multiple times? SmallVector Worklist; SmallSet DoneRegs; - Worklist.push_back(VirtReg); - while (Worklist.size()) { - unsigned Reg = Worklist.pop_back_val(); - if (!DoneRegs.insert(Reg).second) - continue; - - for (auto &Use : MRI->use_instructions(Reg)) { - // For LOCRMux, see if the other operand is already a high or low - // register, and in that case give the correpsonding hints for - // VirtReg. LOCR instructions need both operands in either high or - // low parts. - if (Use.getOpcode() == SystemZ::LOCRMux) { - MachineOperand &TrueMO = Use.getOperand(1); - MachineOperand &FalseMO = Use.getOperand(2); - const TargetRegisterClass *RC = - TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI), - getRC32(TrueMO, VRM, MRI)); - if (RC && RC != &SystemZ::GRX32BitRegClass) { - addHints(Order, Hints, RC, MRI); - // Return true to make these hints the only regs available to - // RA. This may mean extra spilling but since the alternative is - // a jump sequence expansion of the LOCRMux, it is preferred. - return true; + Reg2RC.Change = true; + while (Reg2RC.Change) { + Reg2RC.reset(VirtReg); + Worklist.clear(); + Worklist.push_back(VirtReg); + DoneRegs.clear(); + while (Worklist.size()) { + unsigned Reg = Worklist.pop_back_val(); + if (!DoneRegs.insert(Reg).second) + continue; + for (MachineInstr &MI : MRI->reg_nodbg_instructions(Reg)) { + const TargetRegisterClass *LOW = &SystemZ::GR32BitRegClass; + const TargetRegisterClass *HIGH = &SystemZ::GRH32BitRegClass; + unsigned Regs[3] = {0, 0, 0}; + unsigned NumHi = 0; + unsigned NumLo = 0; + unsigned OpI = 0; + auto processOperands = [&]() -> void { + for (; OpI < MI.getNumOperands(); ++OpI) { + MachineOperand &MO = MI.getOperand(OpI); + if (!MO.isReg() || MO.isImplicit()) + break; + assert(OpI < 3 && "Index out of range."); + Regs[OpI] = MO.getReg(); + Reg2RC.findRC(MO, VRM, MRI); + // Add GRX32 registers to worklist + if (Reg2RC[MO.getReg()] == &SystemZ::GRX32BitRegClass) + Worklist.push_back(MO.getReg()); + else if (Reg2RC[MO.getReg()] == HIGH) + NumHi++; + else + NumLo++; + } + }; + auto addHiOrLoHints = [&](bool OnlyExtra) -> void { + if (!HARDHIORLO && !SOFTHIORLO && OnlyExtra) + return; + const TargetRegisterClass *RC = nullptr; + if (NumHi > 0 && NumLo == 0) + RC = HIGH; + else if (NumLo > 0 && NumHi == 0) + RC = LOW; + bool HintHard = (OnlyExtra ? HARDHIORLO : true); + if (RC != nullptr) + for (unsigned Idx = 0; Regs[Idx] && Idx < 3; ++Idx) + Reg2RC.constrainReg(Regs[Idx], RC, &MI, HintHard); + }; + + switch (MI.getOpcode()) { + case SystemZ::CRMux: + case SystemZ::CLRMux: + processOperands(); + // L_ -> LL + if (Reg2RC[Regs[0]] == LOW) + Reg2RC.constrainReg(Regs[1], LOW, &MI); + // _H -> HH + else if (Reg2RC[Regs[1]] == HIGH) + Reg2RC.constrainReg(Regs[0], HIGH, &MI); + addHiOrLoHints(true/*OnlyExtra*/); + break; + + case SystemZ::SRMux: + case SystemZ::SRMuxK: + case SystemZ::SLRMux: + case SystemZ::SLRMuxK: + processOperands(); + // H__ -> HH_ + if (Reg2RC[Regs[0]] == HIGH) { + Reg2RC.constrainReg(Regs[1], HIGH, &MI); + break; + } + // _L_ -> LLL + if (Reg2RC[Regs[1]] == LOW) { + Reg2RC.constrainReg(Regs[0], LOW, &MI); + Reg2RC.constrainReg(Regs[2], LOW, &MI); + break; + } + LLVM_FALLTHROUGH; + case SystemZ::ARMux: + case SystemZ::ARMuxK: + case SystemZ::ALRMux: + case SystemZ::ALRMuxK: + processOperands(); + // L__ -> LLL + if (Reg2RC[Regs[0]] == LOW) { + Reg2RC.constrainReg(Regs[1], LOW, &MI); + Reg2RC.constrainReg(Regs[2], LOW, &MI); + } + // _LL -> LLL + else if (Reg2RC[Regs[1]] == LOW && Reg2RC[Regs[2]] == LOW) + Reg2RC.constrainReg(Regs[0], LOW, &MI); + else if (Reg2RC[Regs[0]] == HIGH) { + // HL_ -> HLH + if (Reg2RC[Regs[1]] == LOW) + Reg2RC.constrainReg(Regs[2], HIGH, &MI); + // H_L -> HHL + else if (Reg2RC[Regs[2]] == LOW) + Reg2RC.constrainReg(Regs[1], HIGH, &MI); + // Hrr -> HHH + else if (Regs[1] == Regs[2]) + Reg2RC.constrainReg(Regs[1], HIGH, &MI); + } + // _H? -> HH? + else if (Reg2RC[Regs[1]] == HIGH || Reg2RC[Regs[2]] == HIGH) + Reg2RC.constrainReg(Regs[0], HIGH, &MI); + addHiOrLoHints(true/*OnlyExtra*/); + break; + + case SystemZ::LOCRMux: { + processOperands(); + addHiOrLoHints(false/*OnlyExtra*/); + break; } - // Add the other operand of the LOCRMux to the worklist. - unsigned OtherReg = - (TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg()); - if (MRI->getRegClass(OtherReg) == &SystemZ::GRX32BitRegClass) - Worklist.push_back(OtherReg); - } // end LOCRMux - else if (Use.getOpcode() == SystemZ::CHIMux || - Use.getOpcode() == SystemZ::CFIMux) { - if (Use.getOperand(1).getImm() == 0) { - bool OnlyLMuxes = true; - for (MachineInstr &DefMI : MRI->def_instructions(VirtReg)) - if (DefMI.getOpcode() != SystemZ::LMux) - OnlyLMuxes = false; - if (OnlyLMuxes) { - addHints(Order, Hints, &SystemZ::GR32BitRegClass, MRI); - // Return false to make these hints preferred but not obligatory. - return false; + case SystemZ::CHIMux: + case SystemZ::CFIMux: + processOperands(); + if (MI.getOperand(1).getImm() == 0) { + bool OnlyLMuxes = true; + for (MachineInstr &DefMI : MRI->def_instructions(Reg)) + if (DefMI.getOpcode() != SystemZ::LMux) + OnlyLMuxes = false; + if (OnlyLMuxes) + Reg2RC.constrainReg(Regs[0], LOW, &MI); } + break; + + case SystemZ::COPY: + if (MI.getOperand(1).getReg() == Reg) + Worklist.push_back(MI.getOperand(0).getReg()); // USEFUL? + break; } - } // end CHIMux / CFIMux + } // Reg + } // Worklist + } // Change + if (Reg2RC.RequiredHigh > Reg2RC.RequiredLow) { + addHints(Order, Hints, &SystemZ::GRH32BitRegClass, MRI); + LLVM_DEBUG(dbgs() << "SystemZ: Mux hints\n"); + return true; + } else if (Reg2RC.RequiredLow > Reg2RC.RequiredHigh) { + addHints(Order, Hints, &SystemZ::GR32BitRegClass, MRI); + LLVM_DEBUG(dbgs() << "SystemZ: Mux hints\n"); + return true; + } else if (!Reg2RC.RequiredLow) { + if (Reg2RC.RequiredHigh_soft > Reg2RC.RequiredLow_soft) { + addHints(Order, Hints, &SystemZ::GRH32BitRegClass, MRI); + LLVM_DEBUG(dbgs() << "SystemZ: Mux hints\n"); + return false; + } else if (Reg2RC.RequiredLow_soft > Reg2RC.RequiredHigh_soft) { + addHints(Order, Hints, &SystemZ::GR32BitRegClass, MRI); + LLVM_DEBUG(dbgs() << "SystemZ: Mux hints\n"); + return false; } } } @@ -141,6 +312,41 @@ return BaseImplRetVal; } +bool SystemZRegisterInfo::allowHintRecoloring(unsigned VReg, + unsigned NewPhysReg, + const VirtRegMap *VRM) const { + unsigned CurrPhys = VRM->getPhys(VReg); + if (!SystemZ::GRX32BitRegClass.contains(NewPhysReg)) + return true; + assert(SystemZ::GRX32BitRegClass.contains(CurrPhys) && "Expected GRX32 reg."); + if (SystemZ::GR32BitRegClass.contains(CurrPhys) == + SystemZ::GR32BitRegClass.contains(NewPhysReg)) + return true; + + // This is a GRX32 register that has changed between low and high parts. + MachineRegisterInfo *MRI = &VRM->getRegInfo(); + for (const MachineInstr &MI : MRI->reg_nodbg_instructions(VReg)) + switch (MI.getOpcode()) { + case SystemZ::CRMux: + case SystemZ::CLRMux: + case SystemZ::SRMuxK: + case SystemZ::SLRMuxK: + case SystemZ::SRMux: + case SystemZ::SLRMux: + case SystemZ::ARMux: + case SystemZ::ARMuxK: + case SystemZ::ALRMux: + case SystemZ::ALRMuxK: + LLVM_DEBUG(dbgs() << "SystemZ: Avoiding replacing " + << getName(CurrPhys) << " with " + << getName(NewPhysReg) << " in "; MI.dump()); + return false; + default: break; + } + + return true; +} + const MCPhysReg * SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const SystemZSubtarget &Subtarget = MF->getSubtarget(); Index: lib/Target/SystemZ/SystemZScheduleZ13.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZ13.td +++ lib/Target/SystemZ/SystemZScheduleZ13.td @@ -366,6 +366,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; @@ -397,6 +398,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], @@ -547,6 +549,7 @@ def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; Index: lib/Target/SystemZ/SystemZScheduleZ14.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZ14.td +++ lib/Target/SystemZ/SystemZScheduleZ14.td @@ -367,6 +367,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; @@ -398,6 +399,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], @@ -557,6 +559,7 @@ def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; Index: lib/Target/SystemZ/SystemZScheduleZ196.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZ196.td +++ lib/Target/SystemZ/SystemZScheduleZ196.td @@ -326,6 +326,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU2, GroupAlone], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "A(L)?G$")>; @@ -359,6 +360,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU2, GroupAlone], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU, LSU, GroupAlone], @@ -508,6 +510,7 @@ def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXU2, LSU, GroupAlone], Index: lib/Target/SystemZ/SystemZScheduleZEC12.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZEC12.td +++ lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -337,6 +337,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "A(L)?G$")>; @@ -370,6 +371,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU, LSU, GroupAlone], @@ -519,6 +521,7 @@ def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXU, NormalGr], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "CH(Y)?$")>; Index: lib/Target/SystemZ/SystemZSelectMux.cpp =================================================================== --- /dev/null +++ lib/Target/SystemZ/SystemZSelectMux.cpp @@ -0,0 +1,246 @@ +//==------- SystemZSelectMux.cpp - Select Mux instructions --------*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that is run immediately after VirtRegRewriter +// and before MachineCopyPropagation. The purpose is to set the opcodes of +// GRX32 Mux pseudos to match the choice of registers which have been +// carefully hinted during register allocation. It is important to do this +// before any later pass might substitute a register for another without +// knowledge of the actual legal combinations of high/low register +// assignments. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "SystemZInstrInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveStacks.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +using namespace llvm; + +#define SYSTEMZ_EXPAND_PSEUDO_NAME "SystemZ Mux pseudo selection pass" + +#define DEBUG_TYPE "systemz-selectmux" +STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)"); +STATISTIC(LOCRs, "Number of LOCRs"); +STATISTIC(MUXLOWs, "Number of Muxes using only low parts"); +STATISTIC(MUXHIGHs, "Number of Muxes using only high parts"); +STATISTIC(MUXHILOs, "Number of Muxes using high and low parts"); +STATISTIC(BADMUXs, "Number of Muxes ending up in illegal registers"); + +namespace llvm { + void initializeSystemZSelectMuxPass(PassRegistry&); +} + +namespace { +class SystemZSelectMux : public MachineFunctionPass { +public: + static char ID; + SystemZSelectMux() : MachineFunctionPass(ID) { + initializeSystemZSelectMuxPass(*PassRegistry::getPassRegistry()); + } + + const SystemZInstrInfo *TII; + SlotIndexes *SI; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + StringRef getPassName() const override { return SYSTEMZ_EXPAND_PSEUDO_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequiredTransitive(); + AU.addPreserved(); + AU.addPreserved(); + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + bool selectMBB(MachineBasicBlock &MBB); + bool selectMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); +}; +char SystemZSelectMux::ID = 0; +} + +INITIALIZE_PASS(SystemZSelectMux, "systemz-select-mux", + SYSTEMZ_EXPAND_PSEUDO_NAME, false, false) + +/// Returns an instance of the Select Mux pass. +FunctionPass *llvm::createSystemZSelectMuxPass(SystemZTargetMachine &TM) { + return new SystemZSelectMux(); +} + +/// If MBBI references a pseudo instruction that should be selected here, +/// do it and return true. Otherwise return false. +bool SystemZSelectMux::selectMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + unsigned Regs[3]; + bool IsHigh[3]; + bool IsKill[3]; + for (unsigned OpI = 0; OpI < MI.getNumOperands() && OpI < 3; ++OpI) { + MachineOperand &MO = MI.getOperand(OpI); + if (!MO.isReg() || MO.isImplicit()) + break; + Regs[OpI] = MO.getReg(); + IsHigh[OpI] = SystemZ::GRH32BitRegClass.contains(MO.getReg()); + IsKill[OpI] = MO.isKill(); + } + + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + case SystemZ::LOCRMux: { + if (!IsHigh[0] && !IsHigh[2]) + MI.setDesc(TII->get(SystemZ::LOCR)); + else if (IsHigh[0] && IsHigh[2]) + MI.setDesc(TII->get(SystemZ::LOCFHR)); + else + // If we were unable to implement the pseudo with a single instruction, + // we need to convert it back into a branch sequence. This cannot be + // done here since the caller of expandPostRAPseudo does not handle + // changes to the CFG correctly. This change is defered to the + // SystemZExpandPseudo pass. + LOCRMuxJumps++; + if (MI.getOpcode() != SystemZ::LOCRMux) + LOCRs++; + return true; + } + + case SystemZ::CHIMux: + MI.setDesc(TII->get(IsHigh[0] ? SystemZ::CIH :SystemZ::CHI)); + return true; + + case SystemZ::CFIMux: + MI.setDesc(TII->get(IsHigh[0] ? SystemZ::CIH :SystemZ::CFI)); + return true; + + case SystemZ::CRMux: + case SystemZ::CLRMux: { + // MI is a 32 bit compare with register pseudo instruction. Replace it + // with one of the three possible opcodes. "LowHi" is not supported and + // must therefore be swapped (in SystemZExpandPseudo.cpp). + unsigned LowLowOpcode, HiHiOpcode, HiLowOpcode; + if (Opcode == SystemZ::CRMux) { + LowLowOpcode = SystemZ::CR; + HiHiOpcode = SystemZ::CHHR; + HiLowOpcode = SystemZ::CHLR; + } else { + LowLowOpcode = SystemZ::CLR; + HiHiOpcode = SystemZ::CLHHR; + HiLowOpcode = SystemZ::CLHLR; + } + if (!IsHigh[0] && !IsHigh[1]) { + MI.setDesc(TII->get(LowLowOpcode)); + MUXLOWs++; + } + else if (IsHigh[0] && IsHigh[1]) { + MI.setDesc(TII->get(HiHiOpcode)); + MUXHIGHs++; + } + else if (IsHigh[0] && !IsHigh[1]) { + MI.setDesc(TII->get(HiLowOpcode)); + MUXHILOs++; + } + else // LowHi + BADMUXs++; + + return true; + } + + case SystemZ::ARMux: + case SystemZ::ARMuxK: + case SystemZ::SRMux: + case SystemZ::SRMuxK: + case SystemZ::ALRMux: + case SystemZ::ALRMuxK: + case SystemZ::SLRMux: + case SystemZ::SLRMuxK: { + unsigned LowOpcode, HiOpcode, HiHiLoOpcode; + TII->getAddSubMuxOpcodes(LowOpcode, HiOpcode, HiHiLoOpcode, Opcode); + bool IsAdd = MI.isCommutable(); + bool Commute = false; + if (!IsHigh[0] && !IsHigh[1] && !IsHigh[2]) { + Opcode = (Regs[0] == Regs[1] ? + LowOpcode : TII->getThreeOperandOpcode(LowOpcode)); + MUXLOWs++; + } + else if (IsHigh[0] && IsHigh[1] && IsHigh[2]) { + Opcode = HiOpcode; + MUXHIGHs++; + } + else if (IsHigh[0] && (IsHigh[1] || IsHigh[2])) { + if (!IsHigh[1] && IsHigh[2]) { // HLH is unsupported. + if (IsAdd) { + Commute = true; + Opcode = HiHiLoOpcode; + } + } else + Opcode = HiHiLoOpcode; + if (Opcode == HiHiLoOpcode) + MUXHILOs++; + else + BADMUXs++; + } + else + BADMUXs++; + + if (Opcode == MI.getOpcode()) + break; + + MachineBasicBlock *MBB = MI.getParent(); + MachineInstr *NewMI; + if (!Commute) + NewMI = BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(Opcode), Regs[0]) + .addReg(Regs[1], getKillRegState(IsKill[1])) + .addReg(Regs[2], getKillRegState(IsKill[2])); + else + NewMI = BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(Opcode), Regs[0]) + .addReg(Regs[2], getKillRegState(IsKill[2])) + .addReg(Regs[1], getKillRegState(IsKill[1])); + SI->replaceMachineInstrInMaps(MI, *NewMI); + MI.eraseFromParent(); + return true; + } + + default: + break; + } + + return false; +} + +/// Iterate over the instructions in basic block MBB and select any +/// pseudo instructions. Return true if anything was modified. +bool SystemZSelectMux::selectMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= selectMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool SystemZSelectMux::runOnMachineFunction(MachineFunction &MF) { + TII = static_cast(MF.getSubtarget().getInstrInfo()); + SI = &getAnalysis(); + + bool Modified = false; + for (auto &MBB : MF) + Modified |= selectMBB(MBB); + return Modified; +} + Index: lib/Target/SystemZ/SystemZTargetMachine.cpp =================================================================== --- lib/Target/SystemZ/SystemZTargetMachine.cpp +++ lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -182,6 +182,7 @@ void addIRPasses() override; bool addInstSelector() override; bool addILPOpts() override; + void addPostRewrite() override; void addPreSched2() override; void addPreEmitPass() override; }; @@ -211,6 +212,10 @@ return true; } +void SystemZPassConfig::addPostRewrite() { + addPass(createSystemZSelectMuxPass(getSystemZTargetMachine())); +} + void SystemZPassConfig::addPreSched2() { addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine())); Index: test/CodeGen/SystemZ/debuginstr-00.mir =================================================================== --- test/CodeGen/SystemZ/debuginstr-00.mir +++ test/CodeGen/SystemZ/debuginstr-00.mir @@ -63,7 +63,7 @@ bb.1.bb2: liveins: $r0l - CHIMux renamable $r0l, 0, implicit-def $cc + CHI renamable $r0l, 0, implicit-def $cc BRC 14, 6, %bb.1, implicit killed $cc bb.2.bb3.preheader: