Index: include/llvm/CodeGen/TargetPassConfig.h =================================================================== --- include/llvm/CodeGen/TargetPassConfig.h +++ include/llvm/CodeGen/TargetPassConfig.h @@ -378,6 +378,11 @@ return false; } + /// Add passes to be run immediately after virtual registers are rewritten + /// to physical registers. These passes may replace an MI with a new one, + /// but should preserve SlotIndexes while doing so. + virtual void addPostRewrite() { } + /// This method may be implemented by targets that want to run passes after /// register allocation pass pipeline but before prolog-epilog insertion. virtual void addPostRegAlloc() { } Index: include/llvm/CodeGen/TargetRegisterInfo.h =================================================================== --- include/llvm/CodeGen/TargetRegisterInfo.h +++ include/llvm/CodeGen/TargetRegisterInfo.h @@ -813,6 +813,13 @@ const LiveRegMatrix *Matrix = nullptr) const; + // COMMENT GCC - legal operand combinations + virtual bool allowHintRecoloring(unsigned VReg, + unsigned NewPhysReg, + const VirtRegMap *VRM) const { + return true; + } + /// A callback to allow target a chance to update register allocation hints /// when a register is "changed" (e.g. coalesced) to another register. /// e.g. On ARM, some virtual registers should target register pairs, Index: lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- lib/CodeGen/RegAllocGreedy.cpp +++ lib/CodeGen/RegAllocGreedy.cpp @@ -2966,6 +2966,13 @@ LLVM_DEBUG(dbgs() << "=> Not profitable.\n"); continue; } + + // Check with target for any special replacements to avoid. + if (!TRI->allowHintRecoloring(Reg, PhysReg, VRM)) { + LLVM_DEBUG(dbgs() << "=> Not suitable.\n"); + continue; + } + // At this point, the cost is either cheaper or equal. If it is // equal, we consider this is profitable because it may expose // more recoloring opportunities. Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -1154,6 +1154,10 @@ // Finally rewrite virtual registers. addPass(&VirtRegRewriterID); + // Allow targets to expand pseudo instructions depending on the choice of + // registers. + addPostRewrite(); + // Perform stack slot coloring and post-ra machine LICM. // // FIXME: Re-enable coloring with register when it's capable of adding Index: lib/Target/SystemZ/CMakeLists.txt =================================================================== --- lib/Target/SystemZ/CMakeLists.txt +++ lib/Target/SystemZ/CMakeLists.txt @@ -30,6 +30,7 @@ SystemZMCInstLower.cpp SystemZRegisterInfo.cpp SystemZSelectionDAGInfo.cpp + SystemZSelectMux.cpp SystemZShortenInst.cpp SystemZSubtarget.cpp SystemZTargetMachine.cpp Index: lib/Target/SystemZ/SystemZ.h =================================================================== --- lib/Target/SystemZ/SystemZ.h +++ lib/Target/SystemZ/SystemZ.h @@ -194,6 +194,7 @@ FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZSelectMuxPass(SystemZTargetMachine &TM); FunctionPass *createSystemZTDCPass(); } // end namespace llvm Index: lib/Target/SystemZ/SystemZElimCompare.cpp =================================================================== --- lib/Target/SystemZ/SystemZElimCompare.cpp +++ lib/Target/SystemZ/SystemZElimCompare.cpp @@ -35,6 +35,7 @@ #define DEBUG_TYPE "systemz-elim-compare" STATISTIC(BranchOnCounts, "Number of branch-on-count instructions"); +STATISTIC(LoadAndTests, "Number of load-and-test instructions"); STATISTIC(LoadAndTraps, "Number of load-and-trap instructions"); STATISTIC(EliminatedComparisons, "Number of eliminated comparisons"); STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions"); @@ -302,6 +303,7 @@ MIB.setMemRefs(MI.memoperands()); MI.eraseFromParent(); + LoadAndTests++; return true; } Index: lib/Target/SystemZ/SystemZExpandPseudo.cpp =================================================================== --- lib/Target/SystemZ/SystemZExpandPseudo.cpp +++ lib/Target/SystemZ/SystemZExpandPseudo.cpp @@ -16,6 +16,7 @@ #include "SystemZ.h" #include "SystemZInstrInfo.h" #include "SystemZSubtarget.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -23,6 +24,13 @@ #define SYSTEMZ_EXPAND_PSEUDO_NAME "SystemZ pseudo instruction expansion pass" +#define DEBUG_TYPE "systemz-expandpseudo" +STATISTIC(MuxRotate2, "Number of Mux pseudos needing two rotates of a reg."); +STATISTIC(MuxCmpSwap, "Number of Mux pseudo compares swapped."); +STATISTIC(MuxCmpNonSwappable, + "Number of Mux pseudo compares not swappable."); +STATISTIC(MuxCmpCCLiveOut, "Number of Mux pseudo compares with live out CC."); + namespace llvm { void initializeSystemZExpandPseudoPass(PassRegistry&); } @@ -36,17 +44,22 @@ } const SystemZInstrInfo *TII; + const TargetRegisterInfo *TRI; + const SystemZSubtarget *Subtarget; bool runOnMachineFunction(MachineFunction &Fn) override; StringRef getPassName() const override { return SYSTEMZ_EXPAND_PSEUDO_NAME; } private: + bool checkCCLiveIn(MachineBasicBlock &MBB); bool expandMBB(MachineBasicBlock &MBB); bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); bool expandLOCRMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); + bool expandAddSubMux(MachineBasicBlock::iterator MBBI); + bool expandCmpMux(MachineBasicBlock::iterator MBBI); }; char SystemZExpandPseudo::ID = 0; } @@ -111,6 +124,147 @@ return true; } +bool SystemZExpandPseudo::expandAddSubMux(MachineBasicBlock::iterator MBBI) { + SystemZII::MuxInfo MuxI(*MBBI, TRI); + + // 1. See if legal already (after a free commute if needed). + if (MuxI.isAdd(TII) && MuxI.isHIGH(0) && MuxI.isLOW(1) && MuxI.isHIGH(2)) + MuxI.commute(TII); + if (MuxI.selectAddSubMux(TII)) + return true; + // 2. Use destination register if not used by any source operand and if it + // would make operands legal. + if (MuxI.differentRegs(0, 1) && MuxI.differentRegs(0, 2)) { + if (MuxI.isLOW(0)) { + if (MuxI.isLOW(1) && MuxI.isHIGH(2)) + MuxI.useDstRegForOp(2, TII); // LLH -> LLL + else if (MuxI.isHIGH(1) && (MuxI.isLOW(2) || MuxI.sameReg(1, 2))) + MuxI.useDstRegForOp(1, TII); // LHL/LH0H0 -> LLL + } + else if (MuxI.isLOW(1)) + MuxI.useDstRegForOp(1, TII); // HLX -> HHX + } + // 3. Handle the remaining cases + if (!MuxI.DstRegUsed) { + if (MuxI.isLOW(0) && MuxI.isLOW(1) && MuxI.isHIGH(2)) { + assert(MuxI.sameReg(0, 1) && "Case with free dst reg already handled."); + MuxI.RotateReg0(TRI, TII); // LLH -> HHH + } + else if (MuxI.isLOW(0) && MuxI.isHIGH(1) && MuxI.isLOW(2)) { + assert(MuxI.sameReg(0, 2) && "Case with free dst reg already handled."); + if (MuxI.sameReg64(0, 1)) + MuxI.commute(TII); // LHL -> LLH + MuxI.RotateReg0(TRI, TII); // LHL/LLH -> HHX + } + else if (MuxI.isLOW(0) && MuxI.isHIGH(1) && MuxI.isHIGH(2)) { + MuxI.RotateReg0(TRI, TII); // LHH -> HHH/HLH/HHL/HLL + if (MuxI.sameReg64(0, 1)) + MuxI.useDstRegForOp(1, TII); // HLX -> HHH + } + else if (MuxI.isHIGH(0) && MuxI.isLOW(1) && MuxI.isHIGH(2)) { + assert(MuxI.sameReg(0, 2) && "Case with free dst reg already handled."); + assert(!MuxI.isAdd(TII) && "Add was handled earlier."); + if (MuxI.differentRegs64(0, 1)) + MuxI.RotateReg0(TRI, TII); // HLH -> LLL + else + MuxI.commute(TII); // HLH -> HHL + } + } + assert((MuxI.DstRegUsed || MuxI.Rotated || MuxI.Commuted) && + "Should have tried to handle all cases by now!"); + + bool Success = MuxI.selectAddSubMux(TII); + assert(Success && "Expected a legal register combination by now."); + if (MuxI.Rotated) { + MuxI.RotateReg0(TRI, TII); + MuxRotate2++; + } + + return true; +} + +// Return true if CC is live out of MBB. +// XXX Weird that MachineCopyPropagation seems to *not* trust live-in lists..! +static bool isCCLiveOut(MachineBasicBlock &MBB) { + for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(SystemZ::CC)) + return true; + return false; +} + +bool SystemZExpandPseudo::expandCmpMux(MachineBasicBlock::iterator MBBI) { + SystemZII::MuxInfo MuxI(*MBBI, TRI); + MachineBasicBlock *MBB = MBBI->getParent(); + if (MuxI.selectCmpMux(TII)) + return true; + assert(MuxI.isLOW(0) && MuxI.isHIGH(1) && "Only Cmp Lo/Hi is illegal"); + + if (!isCCLiveOut(*MBB)) { + bool Swappable = true; + SmallVector CCUsers; + for (MachineBasicBlock::iterator Itr = std::next(MBBI); + Itr != MBB->end(); ++Itr) { + if (Itr->readsRegister(SystemZ::CC)) { + unsigned Flags = Itr->getDesc().TSFlags; + if ((Flags & SystemZII::CCMaskFirst) || (Flags & SystemZII::CCMaskLast)) + CCUsers.push_back(&*Itr); + else { + Swappable = false; + MuxCmpNonSwappable++; + break; + } + } + if (Itr->definesRegister(SystemZ::CC)) + break; + } + if (Swappable) { + assert(CCUsers.size() && "No CC users found?"); + // Update all CC users. + for (unsigned Idx = 0; Idx < CCUsers.size(); ++Idx) { + unsigned Flags = CCUsers[Idx]->getDesc().TSFlags; + unsigned FirstOpNum = ((Flags & SystemZII::CCMaskFirst) ? + 0 : CCUsers[Idx]->getNumExplicitOperands() - 2); + MachineOperand &CCMaskMO = CCUsers[Idx]->getOperand(FirstOpNum + 1); + switch(CCMaskMO.getImm()) { + case SystemZ::CCMASK_CMP_LT: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_GT); + break; + case SystemZ::CCMASK_CMP_GT: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_LT); + break; + case SystemZ::CCMASK_CMP_LE: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_GE); + break; + case SystemZ::CCMASK_CMP_GE: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_LE); + break; + case SystemZ::CCMASK_CMP_EQ: + case SystemZ::CCMASK_CMP_NE: + break; + default: + llvm_unreachable("Unexpected CCMask value."); + break; + } + } + // Swap the compare operands. + MuxI.commute(TII); + bool Success = MuxI.selectCmpMux(TII); + assert(Success && "Expected a legal register combination by now."); + MuxCmpSwap++; + return true; + } + } + else + MuxCmpCCLiveOut++; + + MuxI.RotateReg0(TRI, TII); + bool Success = MuxI.selectCmpMux(TII); + assert(Success && "Expected a legal register combination by now."); + MuxI.RotateReg0(TRI, TII); + MuxRotate2++; + return true; +} + /// If MBBI references a pseudo instruction that should be expanded here, /// do the expansion and return true. Otherwise return false. bool SystemZExpandPseudo::expandMI(MachineBasicBlock &MBB, @@ -120,6 +274,21 @@ switch (MI.getOpcode()) { case SystemZ::LOCRMux: return expandLOCRMux(MBB, MBBI, NextMBBI); + + case SystemZ::CRMux: + case SystemZ::CLRMux: + return expandCmpMux(MBBI); // XXX is it worth waiting with this? + + case SystemZ::ARMux: // XXX is it worth waiting with this? + case SystemZ::ARMuxK: + case SystemZ::ALRMux: + case SystemZ::ALRMuxK: + case SystemZ::SRMux: + case SystemZ::SRMuxK: + case SystemZ::SLRMux: + case SystemZ::SLRMuxK: + return expandAddSubMux(MBBI); + default: break; } @@ -141,8 +310,40 @@ return Modified; } +bool SystemZExpandPseudo::checkCCLiveIn(MachineBasicBlock &MBB) { + if (MBB.isLiveIn(SystemZ::CC)) + return false; + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + for (; MBBI != E; ++MBBI) { + if (MBBI->isDebugInstr()) + continue; + if (MBBI->readsRegister(SystemZ::CC)) { + MBB.addLiveIn(SystemZ::CC); + return true; + } + if (MBBI->definesRegister(SystemZ::CC)) + break; + } + if (MBBI == E && isCCLiveOut(MBB)) { + MBB.addLiveIn(SystemZ::CC); + return true; + } + return false; +} + bool SystemZExpandPseudo::runOnMachineFunction(MachineFunction &MF) { TII = static_cast(MF.getSubtarget().getInstrInfo()); + Subtarget = &MF.getSubtarget(); + TRI = Subtarget->getRegisterInfo(); + + // We must know about all CC users, so better make sure a live-in CC is in + // the live-in list. + bool Change = true; + while (Change) { + Change = false; + for (auto &MBB : MF) + Change |= checkCCLiveIn(MBB); + } bool Modified = false; for (auto &MBB : MF) Index: lib/Target/SystemZ/SystemZInstrFormats.td =================================================================== --- lib/Target/SystemZ/SystemZInstrFormats.td +++ lib/Target/SystemZ/SystemZInstrFormats.td @@ -4574,6 +4574,19 @@ let OpType = "reg"; } +// Like BinaryRRAndK, but expanded after RA depending on the choice of register. +multiclass BinaryRRAndKPseudo { + let NumOpsKey = key, Predicates = [FeatureHighWord] in { + let NumOpsValue = "3" in + def K : Pseudo<(outs cls1:$R1), (ins cls2:$R2, cls3:$R3), []>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1, + Constraints = "$R1 = $R1src" in + def "" : Pseudo<(outs cls1:$R1), (ins cls2:$R1src, cls3:$R3), + [(set cls1:$R1, (operator cls2:$R1src, cls3:$R3))]>; + } +} + // Like BinaryRI, but expanded after RA depending on the choice of register. class BinaryRIPseudo @@ -4621,6 +4634,15 @@ let AccessBytes = bytes; } +// Like CompareRRE, but expanded after RA depending on the choice of +// register. +class CompareRREPseudo + : Pseudo<(outs), (ins cls1:$R1, cls2:$R2), + [(set CC, (operator cls1:$R1, cls2:$R2))]> { + let isCompare = 1; +} + // Like TestBinarySIL, but expanded later. class TestBinarySILPseudo : Pseudo<(outs), (ins bdaddr12only:$BD1, imm:$I2), Index: lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.h +++ lib/Target/SystemZ/SystemZInstrInfo.h @@ -315,8 +315,51 @@ bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA = nullptr) const override; + + unsigned getThreeOperandOpcode(unsigned Opcode) const; }; +namespace SystemZII { +struct MuxInfo { + MachineInstr *MI; + unsigned PseudoOpcode; + unsigned NumOps; + MachineBasicBlock *MBB; + DebugLoc DL; + unsigned Regs[3]; + unsigned Regs64[3]; + bool IsKill[3]; // XXX: Kill flags useful to maintain? + bool Rotated; + bool Commuted; + bool DstRegUsed; + MuxInfo(MachineInstr &mi_, const TargetRegisterInfo *TRI); + bool isHIGH(unsigned Idx) { + assert(Idx < NumOps && "Operand index out of range."); + return SystemZ::GRH32BitRegClass.contains(Regs[Idx]); + } + bool isLOW(unsigned Idx) { return !isHIGH(Idx); } + bool sameReg(unsigned A, unsigned B) { return Regs[A] == Regs[B]; } + bool differentRegs(unsigned A, unsigned B) { return !sameReg(A, B); } + bool sameReg64(unsigned A, unsigned B) { return Regs64[A] == Regs64[B]; } + bool differentRegs64(unsigned A, unsigned B) { return !sameReg64(A, B); } + void useDstRegForOp(unsigned Idx, const SystemZInstrInfo *TII); + void RotateReg0(const TargetRegisterInfo *TRI, + const SystemZInstrInfo *TII); + void negateRHSIfSub(const SystemZInstrInfo *TII); + void commute(const SystemZInstrInfo *TII); + bool selectAddSubMux(const SystemZInstrInfo *TII, + SlotIndexes *SI = nullptr); + bool selectCmpMux(const SystemZInstrInfo *TII); + void getAddSubMuxOpcodes(unsigned &LowOpcode, unsigned &HiOpcode, + unsigned &HiHiLoOpcode, unsigned Opc) const; + void getCmpMuxOpcodes(unsigned &LowOpcode, unsigned &HiOpcode, + unsigned &HiLowOpcode, unsigned Opc) const; + bool isAdd(const SystemZInstrInfo *TII) const { + return TII->get(PseudoOpcode).isCommutable(); + } +}; +} // end namespace SystemZII + } // end namespace llvm #endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRINFO_H Index: lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.cpp +++ lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -46,7 +46,8 @@ #include "SystemZGenInstrInfo.inc" #define DEBUG_TYPE "systemz-II" -STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)"); +STATISTIC(PREGCOPIES, "Number of copies between allocatable physregs"); +STATISTIC(MuxCopyDst, "Number of Mux pseudos needing a copy to dst."); // Return a mask with Count low bits set. static uint64_t allOnes(unsigned int Count) { @@ -212,13 +213,6 @@ MI.setDesc(get(LowOpcode)); else if (DestIsHigh && SrcIsHigh) MI.setDesc(get(HighOpcode)); - else - LOCRMuxJumps++; - - // If we were unable to implement the pseudo with a single instruction, we - // need to convert it back into a branch sequence. This cannot be done here - // since the caller of expandPostRAPseudo does not handle changes to the CFG - // correctly. This change is defered to the SystemZExpandPseudo pass. } // MI is an RR-style pseudo instruction that zero-extends the low Size bits @@ -797,6 +791,7 @@ if (SystemZ::GRX32BitRegClass.contains(DestReg, SrcReg)) { emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc, false); + PREGCOPIES++; return; } @@ -813,6 +808,7 @@ BuildMI(MBB, MBBI, DL, get(SystemZ::VMRHG), DestReg) .addReg(SrcRegHi, getKillRegState(KillSrc)) .addReg(SrcRegLo, getKillRegState(KillSrc)); + PREGCOPIES++; return; } if (SystemZ::FP128BitRegClass.contains(DestReg) && @@ -828,6 +824,7 @@ copyPhysReg(MBB, MBBI, DL, DestRegHi, SrcReg, false); BuildMI(MBB, MBBI, DL, get(SystemZ::VREPG), DestRegLo) .addReg(SrcReg, getKillRegState(KillSrc)).addImm(1); + PREGCOPIES++; return; } @@ -878,6 +875,7 @@ BuildMI(MBB, MBBI, DL, get(Opcode), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); + PREGCOPIES++; } void SystemZInstrInfo::storeRegToStackSlot( @@ -1301,6 +1299,8 @@ expandLOCPseudo(MI, SystemZ::LOCHI, SystemZ::LOCHHI); return true; + // Selecting LOCRMux here is not strictly necessary but otherwise they + // would *all* become branch sequences without optimized regalloc. case SystemZ::LOCRMux: expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR); return true; @@ -1385,6 +1385,7 @@ expandRIPseudo(MI, SystemZ::AFI, SystemZ::AIH, false); return true; + // Expand CHIMux/CFIMux here when optimized regalloc is disabled. case SystemZ::CHIMux: expandRIPseudo(MI, SystemZ::CHI, SystemZ::CIH, false); return true; @@ -1815,3 +1816,188 @@ return false; } + +unsigned SystemZInstrInfo::getThreeOperandOpcode(unsigned Opcode) const { + return SystemZ::getThreeOperandOpcode(Opcode); +} + +SystemZII::MuxInfo::MuxInfo(MachineInstr &mi_, + const TargetRegisterInfo *TRI) : MI(&mi_) { + PseudoOpcode = mi_.getOpcode(); + NumOps = (MI->isCompare() ? 2 : 3); + MBB = MI->getParent(); + DL = MI->getDebugLoc(); + for (unsigned OpIdx = 0; OpIdx < NumOps; ++OpIdx) { + Regs[OpIdx] = MI->getOperand(OpIdx).getReg(); + assert(SystemZ::GRX32BitRegClass.contains(Regs[OpIdx]) && + "Expected a GRX32 register operand."); + unsigned SubRegIdx = (SystemZ::GR32BitRegClass.contains(Regs[OpIdx]) ? + SystemZ::subreg_l32 : SystemZ::subreg_h32); + Regs64[OpIdx] = TRI->getMatchingSuperReg(Regs[OpIdx], SubRegIdx, + &SystemZ::GR64BitRegClass); + IsKill[OpIdx] = MI->getOperand(OpIdx).isKill(); + } + Rotated = false; + Commuted = false; + DstRegUsed = false; +} + +void SystemZII::MuxInfo::useDstRegForOp(unsigned Idx, + const SystemZInstrInfo *TII) { + TII->copyPhysReg(*MBB, MI, DL, Regs[0], Regs[Idx], IsKill[Idx]); + unsigned ReplacedReg = Regs[Idx]; + for (unsigned OpIdx = 1; OpIdx < NumOps; ++OpIdx) + if (Regs[OpIdx] == ReplacedReg) { + Regs[OpIdx] = Regs[0]; + Regs64[OpIdx] = Regs64[0]; + IsKill[OpIdx] = true; + } + MuxCopyDst++; + DstRegUsed = true; +} + +void SystemZII::MuxInfo::RotateReg0(const TargetRegisterInfo *TRI, + const SystemZInstrInfo *TII) { + // Put the second rotation after MI. + MachineBasicBlock::iterator MBBI = MI; + MachineBasicBlock::iterator InsPos = (Rotated ? std::next(MBBI) : MBBI); + BuildMI(*MBB, InsPos, DL, TII->get(SystemZ::RLLG), Regs64[0]) + .addReg(Regs64[0]).addReg(0).addImm(32); + unsigned Reg = Regs[0]; + unsigned OtherSubRegIdx = + isLOW(0) ? SystemZ::subreg_h32 : SystemZ::subreg_l32; + unsigned OtherReg = TRI->getSubReg(Regs64[0], OtherSubRegIdx); + for (unsigned Idx = 0; Idx < NumOps; ++Idx) { + if (Regs[Idx] == Reg) { + Regs[Idx] = OtherReg; + IsKill[Idx] = true; + } else if (Regs[Idx] == OtherReg) + Regs[Idx] = Reg; + } + Rotated = true; +} + +void SystemZII::MuxInfo::negateRHSIfSub(const SystemZInstrInfo *TII) { + switch(PseudoOpcode) { + case SystemZ::SRMux: + case SystemZ::SRMuxK: + PseudoOpcode = SystemZ::ARMux; + break; + case SystemZ::SLRMux: + case SystemZ::SLRMuxK: + PseudoOpcode = SystemZ::ALRMux; + break; + default: + return; + } + if (isLOW(2)) { + BuildMI(*MBB, MI, DL, TII->get(SystemZ::LNR), Regs[2]) + .addReg(Regs[2], getKillRegState(true)); + } else { + BuildMI(*MBB, MI, DL, TII->get(SystemZ::XIHF), Regs[2]) + .addReg(Regs[2], getKillRegState(true)) + .addImm(0xffffffff); + BuildMI(*MBB, MI, DL, TII->get(SystemZ::AIH), Regs[2]) + .addReg(Regs[2], getKillRegState(true)) + .addImm(1); + } +} + +void SystemZII::MuxInfo::commute(const SystemZInstrInfo *TII) { + negateRHSIfSub(TII); + + unsigned LHS = (NumOps == 3 ? 1 : 0); + unsigned RHS = (NumOps == 3 ? 2 : 1); + unsigned RegTmp = Regs[RHS]; + Regs[RHS] = Regs[LHS]; + Regs[LHS] = RegTmp; + RegTmp = Regs64[RHS]; + Regs64[RHS] = Regs64[LHS]; + Regs64[LHS] = RegTmp; + bool IsKillTmp = IsKill[RHS]; + IsKill[RHS] = IsKill[LHS]; + IsKill[LHS] = IsKillTmp; + Commuted = true; +} + +bool SystemZII::MuxInfo::selectAddSubMux(const SystemZInstrInfo *TII, + SlotIndexes *SI) { + unsigned LowOpcode, HiOpcode, HiHiLoOpcode; + getAddSubMuxOpcodes(LowOpcode, HiOpcode, HiHiLoOpcode, PseudoOpcode); + unsigned Opcode = 0; + if (isLOW(0) && isLOW(1) && isLOW(2)) + Opcode = (Regs[0] == Regs[1] ? + LowOpcode : TII->getThreeOperandOpcode(LowOpcode)); + else if (isHIGH(0) && isHIGH(1) && isHIGH(2)) + Opcode = HiOpcode; + else if (isHIGH(0) && isHIGH(1) && isLOW(2)) + Opcode = HiHiLoOpcode; + else + return false; + MachineInstr *NewMI = BuildMI(*MBB, MI, DL, TII->get(Opcode), Regs[0]) + .addReg(Regs[1], getKillRegState(IsKill[1])) + .addReg(Regs[2], getKillRegState(IsKill[2])); + if (SI != nullptr) + SI->replaceMachineInstrInMaps(*MI, *NewMI); + MI->eraseFromParent(); + MI = NewMI; + return true; +} + +bool SystemZII::MuxInfo::selectCmpMux(const SystemZInstrInfo *TII) { + unsigned LowOpcode, HiOpcode, HiLowOpcode; + getCmpMuxOpcodes(LowOpcode, HiOpcode, HiLowOpcode, PseudoOpcode); + unsigned Opcode = 0; + if (isLOW(0) && isLOW(1)) + Opcode = LowOpcode; + else if (isHIGH(0) && isHIGH(1)) + Opcode = HiOpcode; + else if (isHIGH(0) && isLOW(1)) + Opcode = HiLowOpcode; + else + return false; + if (Commuted || Rotated) + for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { + MachineOperand &MO = MI->getOperand(OpIdx); + MO.setReg(Regs[OpIdx]); + MO.setIsKill(IsKill[OpIdx]); + } + MI->setDesc(TII->get(Opcode)); + return true; +} + +void SystemZII::MuxInfo::getAddSubMuxOpcodes(unsigned &LowOpcode, + unsigned &HiOpcode, unsigned &HiHiLoOpcode, unsigned Opc) const { + if (Opc == SystemZ::ARMux || Opc == SystemZ::ARMuxK) { + LowOpcode = SystemZ::AR; + HiOpcode = SystemZ::AHHHR; + HiHiLoOpcode = SystemZ::AHHLR; + } else if (Opc == SystemZ::SRMux || Opc == SystemZ::SRMuxK) { + LowOpcode = SystemZ::SR; + HiOpcode = SystemZ::SHHHR; + HiHiLoOpcode = SystemZ::SHHLR; + } else if (Opc == SystemZ::ALRMux || Opc == SystemZ::ALRMuxK) { + LowOpcode = SystemZ::ALR; + HiOpcode = SystemZ::ALHHHR; + HiHiLoOpcode = SystemZ::ALHHLR; + } else if (Opc == SystemZ::SLRMux || Opc == SystemZ::SLRMuxK) { + LowOpcode = SystemZ::SLR; + HiOpcode = SystemZ::SLHHHR; + HiHiLoOpcode = SystemZ::SLHHLR; + } else + llvm_unreachable("Bad opcode."); +} + +void SystemZII::MuxInfo::getCmpMuxOpcodes(unsigned &LowOpcode, + unsigned &HiOpcode, unsigned &HiLowOpcode, unsigned Opc) const { + if (Opc == SystemZ::CRMux) { + LowOpcode = SystemZ::CR; + HiOpcode = SystemZ::CHHR; + HiLowOpcode = SystemZ::CHLR; + } else if (Opc == SystemZ::CLRMux) { + LowOpcode = SystemZ::CLR; + HiOpcode = SystemZ::CLHHR; + HiLowOpcode = SystemZ::CLHLR; + } else + llvm_unreachable("Bad opcode."); +} Index: lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.td +++ lib/Target/SystemZ/SystemZInstrInfo.td @@ -893,6 +893,8 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Addition of a register. let isCommutable = 1 in { + // XXX COMMENTs for these new muxes! + defm ARMux : BinaryRRAndKPseudo<"armux", z_sadd, GRX32, GRX32, GRX32>; defm AR : BinaryRRAndK<"ar", 0x1A, 0xB9F8, z_sadd, GR32, GR32>; defm AGR : BinaryRREAndK<"agr", 0xB908, 0xB9E8, z_sadd, GR64, GR64>; } @@ -935,6 +937,7 @@ let Defs = [CC] in { // Addition of a register. let isCommutable = 1 in { + defm ALRMux : BinaryRRAndKPseudo<"alrmux", z_uadd, GRX32, GRX32, GRX32>; defm ALR : BinaryRRAndK<"alr", 0x1E, 0xB9FA, z_uadd, GR32, GR32>; defm ALGR : BinaryRREAndK<"algr", 0xB90A, 0xB9EA, z_uadd, GR64, GR64>; } @@ -994,6 +997,7 @@ // Subtraction producing a signed overflow flag. let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Subtraction of a register. + defm SRMux : BinaryRRAndKPseudo<"srmux", z_ssub, GRX32, GRX32, GRX32>; defm SR : BinaryRRAndK<"sr", 0x1B, 0xB9F9, z_ssub, GR32, GR32>; def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>; defm SGR : BinaryRREAndK<"sgr", 0xB909, 0xB9E9, z_ssub, GR64, GR64>; @@ -1035,6 +1039,7 @@ // Subtraction producing a carry. let Defs = [CC] in { // Subtraction of a register. + defm SLRMux : BinaryRRAndKPseudo<"slrmux", z_usub, GRX32, GRX32, GRX32>; defm SLR : BinaryRRAndK<"slr", 0x1F, 0xB9FB, z_usub, GR32, GR32>; def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>; defm SLGR : BinaryRREAndK<"slgr", 0xB90B, 0xB9EB, z_usub, GR64, GR64>; @@ -1420,7 +1425,10 @@ // some of the signed forms have COMPARE AND BRANCH equivalents whereas none // of the unsigned forms do. let Defs = [CC], CCValues = 0xE in { - // Comparison with a register. + // Comparison with a register. CRMux expands to CR, CHHR or CHLR, + // depending on the choice of register. + def CRMux : CompareRREPseudo, + Requires<[FeatureHighWord]>; def CR : CompareRR <"cr", 0x19, z_scmp, GR32, GR32>; def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>; def CGR : CompareRRE<"cgr", 0xB920, z_scmp, GR64, GR64>; @@ -1472,7 +1480,10 @@ // Unsigned comparisons. let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { - // Comparison with a register. + // Comparison with a register. CLRMux expands to CLR, CLHHR or CLHLR, + // depending on the choice of register. + def CLRMux : CompareRREPseudo, + Requires<[FeatureHighWord]>; def CLR : CompareRR <"clr", 0x15, z_ucmp, GR32, GR32>; def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>; def CLGR : CompareRRE<"clgr", 0xB921, z_ucmp, GR64, GR64>; Index: lib/Target/SystemZ/SystemZRegisterInfo.h =================================================================== --- lib/Target/SystemZ/SystemZRegisterInfo.h +++ lib/Target/SystemZ/SystemZRegisterInfo.h @@ -56,6 +56,10 @@ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override; + bool allowHintRecoloring(unsigned VReg, + unsigned NewPhysReg, + const VirtRegMap *VRM) const override; + // Override TargetRegisterInfo.h. bool requiresRegisterScavenging(const MachineFunction &MF) const override { return true; Index: lib/Target/SystemZ/SystemZRegisterInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -21,6 +21,8 @@ #define GET_REGINFO_TARGET_DESC #include "SystemZGenRegisterInfo.inc" +#define DEBUG_TYPE "regalloc" + SystemZRegisterInfo::SystemZRegisterInfo() : SystemZGenRegisterInfo(SystemZ::R14D) {} @@ -73,6 +75,19 @@ Hints.push_back(Reg); } +// EXPERIMENTAL +#include "llvm/Support/CommandLine.h" +static cl::opt HARDHIORLO("hard-hi-or-lo", cl::init(false)); +static cl::opt SOFTHIORLO("soft-hi-or-lo", cl::init(false)); + +// XXX copied twice now... Move to SystemZRegisterInfo ? +static bool isCCLiveOut(MachineBasicBlock &MBB) { + for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(SystemZ::CC)) + return true; + return false; +} + bool SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, ArrayRef Order, @@ -81,59 +96,233 @@ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( VirtReg, Order, Hints, MF, VRM, Matrix); if (MRI->getRegClass(VirtReg) == &SystemZ::GRX32BitRegClass) { + // Compare/Add/Sub "High" Muxes. Not all combinations are supported by + // the target. + struct Reg2RCMap : std::map { + unsigned VirtReg_; // XXX + bool Change; + unsigned RequiredHigh; // USEFUL? + unsigned RequiredLow; + unsigned RequiredHigh_soft; + unsigned RequiredLow_soft; + std::set CountedMIs; // No improvement...(?) + void reset(unsigned Reg) { + VirtReg_ = Reg; + Change = false; + RequiredHigh = 0; + RequiredLow = 0; + RequiredHigh_soft = 0; + RequiredLow_soft = 0; + CountedMIs.clear(); + } + void findRC(MachineOperand &MO, + const VirtRegMap *VRM, + const MachineRegisterInfo *MRI) { + if (find(MO.getReg()) != end()) + return; + const TargetRegisterClass *RC = getRC32(MO, VRM, MRI); + (*this)[MO.getReg()] = RC; + } + void constrainReg(unsigned Reg, const TargetRegisterClass *RC, + MachineInstr *MI, bool HardHint = true) { + if (Reg == VirtReg_) { + if (!CountedMIs.count(MI)) { + if (HardHint) { + if (RC == &SystemZ::GR32BitRegClass) + RequiredLow++; + else + RequiredHigh++; + } else { + if (RC == &SystemZ::GR32BitRegClass) + RequiredLow_soft++; + else + RequiredHigh_soft++; + } + CountedMIs.insert(MI); + } + } else if (HardHint) { + if ((*this)[Reg] != &SystemZ::GRX32BitRegClass) { + if ((*this)[Reg] != nullptr && (*this)[Reg] != RC) { + // Don't give preferrence if the opposite RC is also needed. + (*this)[Reg] = nullptr; + Change = true; + } + } + else { + (*this)[Reg] = RC; + Change = true; + } + } + } + } Reg2RC; + // Search all use/def connected instructions iteratively to propagate the + // requirements of registers being in either GR32 or GR32H. At the end, + // pass hints for VirtReg if either GR32 or GR32H seems preferred. + // XXX: Make sure not to revisit same instruction multiple times? + const TargetRegisterClass *LOW = &SystemZ::GR32BitRegClass; + const TargetRegisterClass *HIGH = &SystemZ::GRH32BitRegClass; SmallVector Worklist; SmallSet DoneRegs; - Worklist.push_back(VirtReg); - while (Worklist.size()) { - unsigned Reg = Worklist.pop_back_val(); - if (!DoneRegs.insert(Reg).second) - continue; - - for (auto &Use : MRI->use_instructions(Reg)) { - // For LOCRMux, see if the other operand is already a high or low - // register, and in that case give the correpsonding hints for - // VirtReg. LOCR instructions need both operands in either high or - // low parts. - if (Use.getOpcode() == SystemZ::LOCRMux) { - MachineOperand &TrueMO = Use.getOperand(1); - MachineOperand &FalseMO = Use.getOperand(2); - const TargetRegisterClass *RC = - TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI), - getRC32(TrueMO, VRM, MRI)); - if (RC && RC != &SystemZ::GRX32BitRegClass) { - addHints(Order, Hints, RC, MRI); - // Return true to make these hints the only regs available to - // RA. This may mean extra spilling but since the alternative is - // a jump sequence expansion of the LOCRMux, it is preferred. - return true; + Reg2RC.Change = true; + while (Reg2RC.Change) { + Reg2RC.reset(VirtReg); + Worklist.clear(); + Worklist.push_back(VirtReg); + DoneRegs.clear(); + while (Worklist.size()) { + unsigned Reg = Worklist.pop_back_val(); + if (!DoneRegs.insert(Reg).second) + continue; + for (MachineInstr &MI : MRI->reg_nodbg_instructions(Reg)) { + unsigned Regs[3] = {0, 0, 0}; + unsigned NumHi = 0; + unsigned NumLo = 0; + unsigned OpI = 0; + auto processOperands = [&]() -> void { + for (; OpI < MI.getNumOperands(); ++OpI) { + MachineOperand &MO = MI.getOperand(OpI); + if (!MO.isReg() || MO.isImplicit()) + break; + assert(OpI < 3 && "Index out of range."); + Regs[OpI] = MO.getReg(); + Reg2RC.findRC(MO, VRM, MRI); + // Add GRX32 registers to worklist + if (Reg2RC[MO.getReg()] == &SystemZ::GRX32BitRegClass) + Worklist.push_back(MO.getReg()); + else if (Reg2RC[MO.getReg()] == HIGH) + NumHi++; + else if (Reg2RC[MO.getReg()] == LOW) + NumLo++; + } + }; + auto addHiOrLoHints = [&](bool OnlyExtra) -> void { + if (!HARDHIORLO && !SOFTHIORLO && OnlyExtra) + return; + const TargetRegisterClass *RC = nullptr; + if (NumHi > 0 && NumLo == 0) + RC = HIGH; + else if (NumLo > 0 && NumHi == 0) + RC = LOW; + bool HintHard = (OnlyExtra ? HARDHIORLO : true); + if (RC != nullptr) + for (unsigned Idx = 0; Regs[Idx] && Idx < 3; ++Idx) + Reg2RC.constrainReg(Regs[Idx], RC, &MI, HintHard); + }; + + switch (MI.getOpcode()) { + case SystemZ::CRMux: + case SystemZ::CLRMux: { + processOperands(); + // Avoid needing to rotate the LH case + if (isCCLiveOut(*MI.getParent())) { + // L_ -> LL + if (Reg2RC[Regs[0]] == LOW) + Reg2RC.constrainReg(Regs[1], LOW, &MI); + // _H -> HH + else if (Reg2RC[Regs[1]] == HIGH) + Reg2RC.constrainReg(Regs[0], HIGH, &MI); + } + addHiOrLoHints(true/*OnlyExtra*/); + break; } - // Add the other operand of the LOCRMux to the worklist. - unsigned OtherReg = - (TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg()); - if (MRI->getRegClass(OtherReg) == &SystemZ::GRX32BitRegClass) - Worklist.push_back(OtherReg); - } // end LOCRMux - else if (Use.getOpcode() == SystemZ::CHIMux || - Use.getOpcode() == SystemZ::CFIMux) { - if (Use.getOperand(1).getImm() == 0) { - bool OnlyLMuxes = true; - for (MachineInstr &DefMI : MRI->def_instructions(VirtReg)) - if (DefMI.getOpcode() != SystemZ::LMux) - OnlyLMuxes = false; - if (OnlyLMuxes) { - addHints(Order, Hints, &SystemZ::GR32BitRegClass, MRI); - // Return false to make these hints preferred but not obligatory. - return false; + case SystemZ::SRMux: + case SystemZ::SRMuxK: + case SystemZ::SLRMux: + case SystemZ::SLRMuxK: + processOperands(); + // H__ -> HH_ + if (Reg2RC[Regs[0]] == HIGH) { + Reg2RC.constrainReg(Regs[1], HIGH, &MI); + break; + } + // _L_ -> LLL + if (Reg2RC[Regs[1]] == LOW) { + Reg2RC.constrainReg(Regs[0], LOW, &MI); + Reg2RC.constrainReg(Regs[2], LOW, &MI); + break; + } + LLVM_FALLTHROUGH; + case SystemZ::ARMux: + case SystemZ::ARMuxK: + case SystemZ::ALRMux: + case SystemZ::ALRMuxK: + processOperands(); + // L__ -> LLL + if (Reg2RC[Regs[0]] == LOW) { + Reg2RC.constrainReg(Regs[1], LOW, &MI); + Reg2RC.constrainReg(Regs[2], LOW, &MI); + } + // _LL -> LLL + else if (Reg2RC[Regs[1]] == LOW && Reg2RC[Regs[2]] == LOW) + Reg2RC.constrainReg(Regs[0], LOW, &MI); + else if (Reg2RC[Regs[0]] == HIGH) { + // HL_ -> HLH + if (Reg2RC[Regs[1]] == LOW) + Reg2RC.constrainReg(Regs[2], HIGH, &MI); + // H_L -> HHL + else if (Reg2RC[Regs[2]] == LOW) + Reg2RC.constrainReg(Regs[1], HIGH, &MI); + // Hrr -> HHH + else if (Regs[1] == Regs[2]) + Reg2RC.constrainReg(Regs[1], HIGH, &MI); } + // _H? -> HH? + else if (Reg2RC[Regs[1]] == HIGH || Reg2RC[Regs[2]] == HIGH) + Reg2RC.constrainReg(Regs[0], HIGH, &MI); + addHiOrLoHints(true/*OnlyExtra*/); + break; + + case SystemZ::LOCRMux: { + processOperands(); + addHiOrLoHints(false/*OnlyExtra*/); + break; + } + + case SystemZ::CHIMux: + case SystemZ::CFIMux: + processOperands(); + if (MI.getOperand(1).getImm() == 0) { + bool OnlyLMuxes = true; + for (MachineInstr &DefMI : MRI->def_instructions(Reg)) + if (DefMI.getOpcode() != SystemZ::LMux) + OnlyLMuxes = false; + if (OnlyLMuxes) + Reg2RC.constrainReg(Regs[0], LOW, &MI, false/*HardHint*/); // XXX Soft? + } + break; + + // case SystemZ::COPY: + // if (MI.getOperand(1).getReg() == Reg) + // Worklist.push_back(MI.getOperand(0).getReg()); // USEFUL? + // break; + + default: break; } - } // end CHIMux / CFIMux + } // Reg + } // Worklist + } // Change + // XXXX !! + if ((Reg2RC.RequiredHigh > Reg2RC.RequiredLow) && Reg2RC.RequiredLow == 0) { + addHints(Order, Hints, &SystemZ::GRH32BitRegClass, MRI); + return true; + } else if ((Reg2RC.RequiredLow > Reg2RC.RequiredHigh) && Reg2RC.RequiredHigh == 0) { + addHints(Order, Hints, &SystemZ::GR32BitRegClass, MRI); + return true; + } else if (!Reg2RC.RequiredLow) { + if ((Reg2RC.RequiredHigh_soft > Reg2RC.RequiredLow_soft) && + Reg2RC.RequiredLow_soft == 0) { + addHints(Order, Hints, &SystemZ::GRH32BitRegClass, MRI); + return false; + } else if ((Reg2RC.RequiredLow_soft > Reg2RC.RequiredHigh_soft) && + Reg2RC.RequiredHigh_soft == 0) { + addHints(Order, Hints, &SystemZ::GR32BitRegClass, MRI); + return false; } } } @@ -141,6 +330,42 @@ return BaseImplRetVal; } +bool SystemZRegisterInfo::allowHintRecoloring(unsigned VReg, + unsigned NewPhysReg, + const VirtRegMap *VRM) const { + unsigned CurrPhys = VRM->getPhys(VReg); + if (!SystemZ::GRX32BitRegClass.contains(NewPhysReg)) + return true; + assert(SystemZ::GRX32BitRegClass.contains(CurrPhys) && "Expected GRX32 reg."); + if (SystemZ::GR32BitRegClass.contains(CurrPhys) == + SystemZ::GR32BitRegClass.contains(NewPhysReg)) + return true; + // XXX This assumes that nearly all Muxes end up with legal reg assignments. + + // This is a GRX32 register that has changed between low and high parts. + MachineRegisterInfo *MRI = &VRM->getRegInfo(); + for (const MachineInstr &MI : MRI->reg_nodbg_instructions(VReg)) + switch (MI.getOpcode()) { + case SystemZ::CRMux: + case SystemZ::CLRMux: + case SystemZ::SRMuxK: + case SystemZ::SLRMuxK: + case SystemZ::SRMux: + case SystemZ::SLRMux: + case SystemZ::ARMux: + case SystemZ::ARMuxK: + case SystemZ::ALRMux: + case SystemZ::ALRMuxK: + LLVM_DEBUG(dbgs() << "SystemZ: Avoiding replacing " + << getName(CurrPhys) << " with " + << getName(NewPhysReg) << " in "; MI.dump()); + return false; + default: break; + } + + return true; +} + const MCPhysReg * SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const SystemZSubtarget &Subtarget = MF->getSubtarget(); Index: lib/Target/SystemZ/SystemZScheduleZ13.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZ13.td +++ lib/Target/SystemZ/SystemZScheduleZ13.td @@ -366,6 +366,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; @@ -397,6 +398,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], @@ -547,6 +549,7 @@ def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; Index: lib/Target/SystemZ/SystemZScheduleZ14.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZ14.td +++ lib/Target/SystemZ/SystemZScheduleZ14.td @@ -367,6 +367,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; @@ -398,6 +399,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], @@ -557,6 +559,7 @@ def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; Index: lib/Target/SystemZ/SystemZScheduleZ196.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZ196.td +++ lib/Target/SystemZ/SystemZScheduleZ196.td @@ -326,6 +326,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU2, GroupAlone], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "A(L)?G$")>; @@ -359,6 +360,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU2, GroupAlone], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU, LSU, GroupAlone], @@ -508,6 +510,7 @@ def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXU2, LSU, GroupAlone], Index: lib/Target/SystemZ/SystemZScheduleZEC12.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZEC12.td +++ lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -337,6 +337,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "A(L)?G$")>; @@ -370,6 +371,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU, LSU, GroupAlone], @@ -519,6 +521,7 @@ def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXU, NormalGr], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "CH(Y)?$")>; Index: lib/Target/SystemZ/SystemZSelectMux.cpp =================================================================== --- /dev/null +++ lib/Target/SystemZ/SystemZSelectMux.cpp @@ -0,0 +1,209 @@ +//==------- SystemZSelectMux.cpp - Select Mux instructions --------*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that is run immediately after VirtRegRewriter +// and before MachineCopyPropagation. The purpose is to set the opcodes of +// GRX32 Mux pseudos to match the choice of registers which have been +// carefully hinted during register allocation. It is important to do this +// before any later pass might substitute a register for another without +// knowledge of the actual legal combinations of high/low register +// assignments. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "SystemZInstrInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveStacks.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +using namespace llvm; + +#define SYSTEMZ_EXPAND_PSEUDO_NAME "SystemZ Mux pseudo selection pass" + +#define DEBUG_TYPE "systemz-selectmux" +STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)"); +STATISTIC(LOCRs, "Number of LOCRs"); +STATISTIC(MUXLOWs, "Number of Muxes using only low parts"); +STATISTIC(MUXHIGHs, "Number of Muxes using only high parts"); +STATISTIC(MUXHILOs, "Number of Muxes using high and low parts"); +STATISTIC(BADMUXs, "Number of Muxes ending up in illegal registers"); + +namespace llvm { + void initializeSystemZSelectMuxPass(PassRegistry&); +} + +namespace { +class SystemZSelectMux : public MachineFunctionPass { +public: + static char ID; + SystemZSelectMux() : MachineFunctionPass(ID) { + initializeSystemZSelectMuxPass(*PassRegistry::getPassRegistry()); + } + + const SystemZInstrInfo *TII; + const TargetRegisterInfo *TRI; + SlotIndexes *SI; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + StringRef getPassName() const override { return SYSTEMZ_EXPAND_PSEUDO_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequiredTransitive(); + AU.addPreserved(); + AU.addPreserved(); + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + bool selectMBB(MachineBasicBlock &MBB); + bool selectMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); +}; +char SystemZSelectMux::ID = 0; +} + +INITIALIZE_PASS(SystemZSelectMux, "systemz-select-mux", + SYSTEMZ_EXPAND_PSEUDO_NAME, false, false) + +/// Returns an instance of the Select Mux pass. +FunctionPass *llvm::createSystemZSelectMuxPass(SystemZTargetMachine &TM) { + return new SystemZSelectMux(); +} + +/// If MBBI references a pseudo instruction that should be selected here, +/// do it and return true. Otherwise return false. +bool SystemZSelectMux::selectMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + case SystemZ::LOCRMux: { + SystemZII::MuxInfo MuxI(MI, TRI); + if (MuxI.isLOW(0) && MuxI.isLOW(2)) + MI.setDesc(TII->get(SystemZ::LOCR)); + else if (MuxI.isHIGH(0) && MuxI.isHIGH(2)) + MI.setDesc(TII->get(SystemZ::LOCFHR)); + else + // If we were unable to implement the pseudo with a single instruction, + // we need to convert it back into a branch sequence. This cannot be + // done here since the caller of expandPostRAPseudo does not handle + // changes to the CFG correctly. This change is defered to the + // SystemZExpandPseudo pass. + LOCRMuxJumps++; + if (MI.getOpcode() != SystemZ::LOCRMux) + LOCRs++; + return true; + } + + case SystemZ::CHIMux: { + bool IsHigh = SystemZ::GRH32BitRegClass.contains(MI.getOperand(0).getReg()); + MI.setDesc(TII->get(IsHigh ? SystemZ::CIH :SystemZ::CHI)); + return true; + } + + case SystemZ::CFIMux: { + bool IsHigh = SystemZ::GRH32BitRegClass.contains(MI.getOperand(0).getReg()); + MI.setDesc(TII->get(IsHigh ? SystemZ::CIH :SystemZ::CFI)); + return true; + } + + case SystemZ::CRMux: + case SystemZ::CLRMux: { + // MI is a 32 bit compare with register pseudo instruction. Replace it + // with one of the three possible opcodes. "LowHi" is not supported and + // must therefore be swapped (in SystemZExpandPseudo.cpp). + bool Legal = true; + SystemZII::MuxInfo MuxI(MI, TRI); + if (MuxI.isLOW(0) && MuxI.isLOW(1)) + MUXLOWs++; + else if (MuxI.isHIGH(0) && MuxI.isHIGH(1)) + MUXHIGHs++; + else if (MuxI.isHIGH(0) && MuxI.isLOW(1)) + MUXHILOs++; + else { // LowHi // XXX Try commute? + BADMUXs++; + Legal = false; + } + bool Success = MuxI.selectCmpMux(TII); + assert(Legal == Success && "MuxInfo broken?"); + return Success; + } + + case SystemZ::ARMux: + case SystemZ::ARMuxK: + case SystemZ::ALRMux: + case SystemZ::ALRMuxK: + case SystemZ::SRMux: + case SystemZ::SRMuxK: + case SystemZ::SLRMux: + case SystemZ::SLRMuxK: { + bool IsAdd = MI.isCommutable(); + unsigned OldBadMuxStat = BADMUXs; + SystemZII::MuxInfo MuxI(MI, TRI); + if (MuxI.isLOW(0) && MuxI.isLOW(1) && MuxI.isLOW(2)) + MUXLOWs++; + else if (MuxI.isHIGH(0) && MuxI.isHIGH(1) && MuxI.isHIGH(2)) + MUXHIGHs++; + else if (MuxI.isHIGH(0) && (MuxI.isHIGH(1) || MuxI.isHIGH(2))) { + if (MuxI.isHIGH(2)) { // HLH is unsupported. + if (IsAdd) { + MuxI.commute(TII); + MUXHILOs++; + } + else + BADMUXs++; + } else + MUXHILOs++; + } + else + BADMUXs++; + + bool Success = MuxI.selectAddSubMux(TII, SI); + assert(Success == (OldBadMuxStat == BADMUXs) && "MuxInfo broken?"); + return Success; + } + + default: + break; + } + + return false; +} + +/// Iterate over the instructions in basic block MBB and select any +/// pseudo instructions. Return true if anything was modified. +bool SystemZSelectMux::selectMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= selectMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool SystemZSelectMux::runOnMachineFunction(MachineFunction &MF) { + TII = static_cast(MF.getSubtarget().getInstrInfo()); + SI = &getAnalysis(); + TRI = MF.getSubtarget().getRegisterInfo(); + + bool Modified = false; + for (auto &MBB : MF) + Modified |= selectMBB(MBB); + return Modified; +} + Index: lib/Target/SystemZ/SystemZTargetMachine.cpp =================================================================== --- lib/Target/SystemZ/SystemZTargetMachine.cpp +++ lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -182,6 +182,7 @@ void addIRPasses() override; bool addInstSelector() override; bool addILPOpts() override; + void addPostRewrite() override; void addPreSched2() override; void addPreEmitPass() override; }; @@ -211,6 +212,10 @@ return true; } +void SystemZPassConfig::addPostRewrite() { + addPass(createSystemZSelectMuxPass(getSystemZTargetMachine())); +} + void SystemZPassConfig::addPreSched2() { addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine())); Index: test/CodeGen/SystemZ/debuginstr-00.mir =================================================================== --- test/CodeGen/SystemZ/debuginstr-00.mir +++ test/CodeGen/SystemZ/debuginstr-00.mir @@ -63,7 +63,7 @@ bb.1.bb2: liveins: $r0l - CHIMux renamable $r0l, 0, implicit-def $cc + CHI renamable $r0l, 0, implicit-def $cc BRC 14, 6, %bb.1, implicit killed $cc bb.2.bb3.preheader: Index: test/CodeGen/SystemZ/expand-mux-pseudos.mir =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/expand-mux-pseudos.mir @@ -0,0 +1,305 @@ +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -start-after=postrapseudos \ +# RUN: -verify-machineinstrs -enable-post-misched=false %s -o - | FileCheck %s +# +# Test that SystemZExpandPseudo can handle all combinations of high/low +# registers and transform all Add / Sub Mux instructions into legal target +# instructions. + +name: fun0 +body: | + bb.0: + successors: %bb.1 + $r0l = LHI 0 + $r0h = IIHF 0 + $r1l = LHI 0 + $r1h = IIHF 0 + $r2l = LHI 0 + $r2h = IIHF 0 + + ; CHECK-LABEL: fun0 + ; CHECK: llihl %r2, 0 + + ;; ARMuxK + + ; LLL + renamable $r2l = ARMuxK renamable $r1l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: ark %r2, %r1, %r0 + renamable $r2l = ARMuxK renamable $r2l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: ar %r2, %r0 + + ; LLH + renamable $r2l = ARMuxK renamable $r1l, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: ark %r2, %r1, %r2 + renamable $r2l = ARMuxK renamable $r2l, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhhr %r2, %r2, %r1 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = ARMuxK renamable $r2l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhlr %r2, %r2, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + + ; LHL + renamable $r2l = ARMuxK renamable $r1h, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: risblg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: ar %r2, %r1 + renamable $r2l = ARMuxK renamable $r1h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhhr %r2, %r1, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = ARMuxK renamable $r2h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhlr %r2, %r2, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + + ; LHH + renamable $r2l = ARMuxK renamable $r1h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhhr %r2, %r1, %r0 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = ARMuxK renamable $r2h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: risbhg %r2, %r2, 0, 159, 32 + ; CHECK-NEXT: ahhhr %r2, %r2, %r0 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = ARMuxK renamable $r0h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhlr %r2, %r0, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = ARMuxK renamable $r2h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r2, %r2, 0, 159, 32 + ; CHECK-NEXT: ar %r2, %r2 + + ; HLL + renamable $r2h = ARMuxK renamable $r1l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: ahhlr %r2, %r2, %r0 + renamable $r2h = ARMuxK renamable $r1l, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: ahhhr %r2, %r2, %r2 + + ; HLH + renamable $r2h = ARMuxK renamable $r1l, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: ahhlr %r2, %r0, %r1 + renamable $r2h = ARMuxK renamable $r1l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: ahhlr %r2, %r2, %r1 + renamable $r2h = ARMuxK renamable $r2l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: ahhlr %r2, %r2, %r2 + + ; HHL + renamable $r2h = ARMuxK renamable $r1h, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: ahhlr %r2, %r1, %r1 + ; HHH + renamable $r2h = ARMuxK renamable $r1h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: ahhhr %r2, %r1, %r0 + + ;; SRMuxK + + ; LLL + renamable $r2l = SRMuxK renamable $r1l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: srk %r2, %r1, %r0 + renamable $r2l = SRMuxK renamable $r2l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: sr %r2, %r0 + + ; LLH + renamable $r2l = SRMuxK renamable $r1l, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: srk %r2, %r1, %r2 + renamable $r2l = SRMuxK renamable $r2l, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: shhhr %r2, %r2, %r1 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = SRMuxK renamable $r2l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: shhlr %r2, %r2, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + + ; LHL + renamable $r2l = SRMuxK renamable $r1h, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: risblg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: sr %r2, %r1 + renamable $r2l = SRMuxK renamable $r1h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: shhhr %r2, %r1, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = SRMuxK renamable $r2h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: lnr %r2, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhlr %r2, %r2, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + + ; LHH + renamable $r2l = SRMuxK renamable $r1h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: shhhr %r2, %r1, %r0 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = SRMuxK renamable $r2h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: risbhg %r2, %r2, 0, 159, 32 + ; CHECK-NEXT: shhhr %r2, %r2, %r0 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = SRMuxK renamable $r0h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: shhlr %r2, %r0, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = SRMuxK renamable $r2h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r2, %r2, 0, 159, 32 + ; CHECK-NEXT: sr %r2, %r2 + + ; HLL + renamable $r2h = SRMuxK renamable $r1l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: shhlr %r2, %r2, %r0 + renamable $r2h = SRMuxK renamable $r1l, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: shhhr %r2, %r2, %r2 + + ; HLH + renamable $r2h = SRMuxK renamable $r1l, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: shhhr %r2, %r2, %r0 + renamable $r2h = SRMuxK renamable $r1l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: srk %r2, %r1, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2h = SRMuxK renamable $r2l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: xihf %r2, 4294967295 + ; CHECK-NEXT: aih %r2, 1 + ; CHECK-NEXT: ahhlr %r2, %r2, %r2 + + ; HHL + renamable $r2h = SRMuxK renamable $r1h, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: shhlr %r2, %r1, %r1 + + ; HHH + renamable $r2h = SRMuxK renamable $r1h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: shhhr %r2, %r1, %r0 + + bb.1: + successors: + +... + +# Test comparisons. + +--- +name: fun1 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + $r0l = LHI 0 + $r0h = IIHF 0 + $r1l = LHI 0 + $r1h = IIHF 0 + $r2l = LHI 0 + $r2h = IIHF 0 + + ; CHECK-LABEL: fun1 + ; CHECK: iihf %r2, 0 + + CRMux renamable $r1l, renamable $r2l, implicit-def $cc + BRC 14, 4, %bb.2, implicit killed $cc + ; CHECK-NEXT: crjl %r1, %r2, .LBB1_2 + + bb.1: + successors: + $r0l = LHI 0 + + bb.2: + successors: %bb.3, %bb.4 + liveins: $r1h, $r2l + + CRMux renamable $r1h, renamable $r2l, implicit-def $cc + BRC 14, 4, %bb.4, implicit killed $cc + ; CHECK-LABEL: .LBB1_2: + ; CHECK-NEXT: chlr %r1, %r2 + ; CHECK-NEXT: jl .LBB1_4 + + bb.3: + successors: + $r0l = LHI 0 + + bb.4: + successors: %bb.5, %bb.6 + liveins: $r1h, $r2h + + CRMux renamable $r1h, renamable $r2h, implicit-def $cc + BRC 14, 4, %bb.6, implicit killed $cc + ; CHECK-LABEL: .LBB1_4: + ; CHECK-NEXT: chhr %r1, %r2 + ; CHECK-NEXT: jl .LBB1_6 + + bb.5: + successors: + $r0l = LHI 0 + + bb.6: + successors: %bb.7, %bb.8 + liveins: $r1l, $r2h + + CRMux renamable $r1l, renamable $r2h, implicit-def $cc + BRC 14, 4, %bb.8, implicit killed $cc + ; CHECK-LABEL: .LBB1_6: + ; CHECK-NEXT: chlr %r2, %r1 + ; CHECK-NEXT: jh .LBB1_8 + + bb.7: + successors: + $r0l = LHI 0 + + bb.8: + liveins: $r1l, $r2h + + CRMux renamable $r1l, renamable $r2h, implicit-def $cc + ; CHECK-LABEL: .LBB1_8: + ; CHECK-NEXT: rllg %r1, %r1, 32 + ; CHECK-NEXT: chhr %r1, %r2 + ; CHECK-NEXT: rllg %r1, %r1, 32 + ; CHECK-NEXT: jl .LBB1_10 + + bb.9: + successors: %bb.10, %bb.11 + liveins: $cc + BRC 14, 4, %bb.11, implicit killed $cc + + bb.10: + successors: + $r0l = LHI 0 + + bb.11: + successors: %bb.12, %bb.13 + liveins: $r1l, $r1h, $r2l + + CRMux renamable $r1h, renamable $r2l, implicit-def $cc + renamable $r1l = LOCHI killed renamable $r1l, 0, 14, 8, implicit $cc + BRC 14, 4, %bb.13, implicit killed $cc + ; CHECK-LABEL: .LBB1_10: + ; CHECK-NEXT: chlr %r1, %r2 + ; CHECK-NEXT: lochie %r1, 0 + ; CHECK-NEXT: jl .LBB1_12 + + bb.12: + successors: + $r0l = LHI 0 + + bb.13: + successors: %bb.14, %bb.15 + liveins: $r1l, $r1h, $r2l + + CRMux renamable $r2l, renamable $r1h, implicit-def $cc + renamable $r1l = LOCHI killed renamable $r1l, 0, 14, 8, implicit $cc + BRC 14, 4, %bb.15, implicit killed $cc + ; CHECK-LABEL: .LBB1_12: + ; CHECK-NEXT: chlr %r1, %r2 + ; CHECK-NEXT: lochie %r1, 0 + ; CHECK-NEXT: jh .LBB1_14 + + bb.14: + successors: + $r0l = LHI 0 + + bb.15: + +...