Index: include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- include/llvm/CodeGen/TargetInstrInfo.h +++ include/llvm/CodeGen/TargetInstrInfo.h @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOutliner.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/ErrorHandling.h" @@ -934,7 +935,8 @@ /// for removing the old instruction. MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef Ops, int FI, - LiveIntervals *LIS = nullptr) const; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const; /// Same as the previous version except it allows folding of any load and /// store from / to any address, not just from a specific stack slot. @@ -1024,7 +1026,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const { + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const { return nullptr; } Index: include/llvm/CodeGen/TargetPassConfig.h =================================================================== --- include/llvm/CodeGen/TargetPassConfig.h +++ include/llvm/CodeGen/TargetPassConfig.h @@ -382,6 +382,11 @@ return false; } + /// Add passes to be run immediately after virtual registers are rewritten + /// to physical registers. These passes may replace an MI with a new one, + /// but should preserve SlotIndexes while doing so. + virtual void addPostRewrite() { } + /// This method may be implemented by targets that want to run passes after /// register allocation pass pipeline but before prolog-epilog insertion. virtual void addPostRegAlloc() { } Index: include/llvm/CodeGen/TargetRegisterInfo.h =================================================================== --- include/llvm/CodeGen/TargetRegisterInfo.h +++ include/llvm/CodeGen/TargetRegisterInfo.h @@ -813,6 +813,23 @@ const LiveRegMatrix *Matrix = nullptr) const; + /// Let target reject replacing the previously allocated phys reg with \p + /// NewPhysReg, by returning false. This would typically happen where + /// target has taken care in getRegAllocationHints() to have only a subset + /// of the available registers made available and it is important not to + /// loose this restriction. A better solution would probably be to do like + /// in gcc and provide legal operand assignment combinations at the + /// instruction level. On SystemZ some 32 bit instructions can use either + /// high or low parts of 64 bit registers, but some operand combinations + /// are not possible. For instance, CRMux (Compare Register) can be lowered + /// to C (LOW LOW), CHHR (HIGH, HIGH) or CHLR (HIGH, LOW), but (LOW, HIGH) + /// is *not* supported. + virtual bool allowHintRecoloring(unsigned VReg, + unsigned NewPhysReg, + const VirtRegMap *VRM) const { + return true; + } + /// A callback to allow target a chance to update register allocation hints /// when a register is "changed" (e.g. coalesced) to another register. /// e.g. On ARM, some virtual registers should target register pairs, Index: lib/CodeGen/InlineSpiller.cpp =================================================================== --- lib/CodeGen/InlineSpiller.cpp +++ lib/CodeGen/InlineSpiller.cpp @@ -837,7 +837,7 @@ MachineInstr *FoldMI = LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS) - : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS); + : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM); if (!FoldMI) return false; Index: lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- lib/CodeGen/RegAllocGreedy.cpp +++ lib/CodeGen/RegAllocGreedy.cpp @@ -2966,6 +2966,13 @@ LLVM_DEBUG(dbgs() << "=> Not profitable.\n"); continue; } + + // Check with target for any special replacements to avoid. + if (!TRI->allowHintRecoloring(Reg, PhysReg, VRM)) { + LLVM_DEBUG(dbgs() << "=> Not suitable.\n"); + continue; + } + // At this point, the cost is either cheaper or equal. If it is // equal, we consider this is profitable because it may expose // more recoloring opportunities. Index: lib/CodeGen/TargetInstrInfo.cpp =================================================================== --- lib/CodeGen/TargetInstrInfo.cpp +++ lib/CodeGen/TargetInstrInfo.cpp @@ -522,7 +522,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, ArrayRef Ops, int FI, - LiveIntervals *LIS) const { + LiveIntervals *LIS, + VirtRegMap *VRM) const { auto Flags = MachineMemOperand::MONone; for (unsigned OpIdx : Ops) Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore @@ -568,7 +569,7 @@ MBB->insert(MI, NewMI); } else { // Ask the target to do the actual folding. - NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS); + NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM); } if (NewMI) { Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -1167,6 +1167,12 @@ addPass(&MachineSchedulerID); if (addRegAssignmentOptimized()) { + // Allow targets to expand pseudo instructions depending on the choice of + // registers. If special requirements are involved (see comment for + // allowHintRecoloring(), it is important that this is done before + // MachineCopyPropagation). + addPostRewrite(); + // Copy propagate to forward register uses and try to eliminate COPYs that // were not coalesced. addPass(&MachineCopyPropagationID); Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -158,7 +158,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const override; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; /// \returns true if a branch from an instruction with opcode \p BranchOpc /// bytes is capable of jumping to a position \p BrOffset bytes away. Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3023,7 +3023,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS) const { + LiveIntervals *LIS, VirtRegMap *VRM) const { // This is a bit of a hack. Consider this instruction: // // %0 = COPY %sp; GPR64all:%0 Index: lib/Target/SystemZ/CMakeLists.txt =================================================================== --- lib/Target/SystemZ/CMakeLists.txt +++ lib/Target/SystemZ/CMakeLists.txt @@ -30,6 +30,7 @@ SystemZMCInstLower.cpp SystemZRegisterInfo.cpp SystemZSelectionDAGInfo.cpp + SystemZSelectMux.cpp SystemZShortenInst.cpp SystemZSubtarget.cpp SystemZTargetMachine.cpp Index: lib/Target/SystemZ/SystemZ.h =================================================================== --- lib/Target/SystemZ/SystemZ.h +++ lib/Target/SystemZ/SystemZ.h @@ -194,6 +194,7 @@ FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZSelectMuxPass(SystemZTargetMachine &TM); FunctionPass *createSystemZTDCPass(); } // end namespace llvm Index: lib/Target/SystemZ/SystemZElimCompare.cpp =================================================================== --- lib/Target/SystemZ/SystemZElimCompare.cpp +++ lib/Target/SystemZ/SystemZElimCompare.cpp @@ -35,6 +35,7 @@ #define DEBUG_TYPE "systemz-elim-compare" STATISTIC(BranchOnCounts, "Number of branch-on-count instructions"); +STATISTIC(LoadAndTests, "Number of load-and-test instructions"); STATISTIC(LoadAndTraps, "Number of load-and-trap instructions"); STATISTIC(EliminatedComparisons, "Number of eliminated comparisons"); STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions"); @@ -103,14 +104,6 @@ } // end anonymous namespace -// Return true if CC is live out of MBB. -static bool isCCLiveOut(MachineBasicBlock &MBB) { - for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) - if ((*SI)->isLiveIn(SystemZ::CC)) - return true; - return false; -} - // Returns true if MI is an instruction whose output equals the value in Reg. static bool preservesValueOf(MachineInstr &MI, unsigned Reg) { switch (MI.getOpcode()) { @@ -302,6 +295,7 @@ MIB.setMemRefs(MI.memoperands()); MI.eraseFromParent(); + LoadAndTests++; return true; } @@ -598,7 +592,7 @@ // Walk backwards through the block looking for comparisons, recording // all CC users as we go. The subroutines can delete Compare and // instructions before it. - bool CompleteCCUsers = !isCCLiveOut(MBB); + bool CompleteCCUsers = !SystemZRegisterInfo::isCCLiveOut(MBB); SmallVector CCUsers; MachineBasicBlock::iterator MBBI = MBB.end(); while (MBBI != MBB.begin()) { Index: lib/Target/SystemZ/SystemZExpandPseudo.cpp =================================================================== --- lib/Target/SystemZ/SystemZExpandPseudo.cpp +++ lib/Target/SystemZ/SystemZExpandPseudo.cpp @@ -16,6 +16,7 @@ #include "SystemZ.h" #include "SystemZInstrInfo.h" #include "SystemZSubtarget.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -23,6 +24,13 @@ #define SYSTEMZ_EXPAND_PSEUDO_NAME "SystemZ pseudo instruction expansion pass" +#define DEBUG_TYPE "systemz-expandpseudo" +STATISTIC(MuxRotate2, "Number of Mux pseudos needing two rotates of a reg."); +STATISTIC(MuxCmpSwap, "Number of Mux pseudo compares swapped."); +STATISTIC(MuxCmpNonSwappable, + "Number of Mux pseudo compares not swappable."); +STATISTIC(MuxCmpCCLiveOut, "Number of Mux pseudo compares with live out CC."); + namespace llvm { void initializeSystemZExpandPseudoPass(PassRegistry&); } @@ -36,17 +44,22 @@ } const SystemZInstrInfo *TII; + const TargetRegisterInfo *TRI; + const SystemZSubtarget *Subtarget; bool runOnMachineFunction(MachineFunction &Fn) override; StringRef getPassName() const override { return SYSTEMZ_EXPAND_PSEUDO_NAME; } private: + bool updateLiveInLists_CC(MachineBasicBlock &MBB); bool expandMBB(MachineBasicBlock &MBB); bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); bool expandLOCRMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); + bool expandAddSubMux(MachineBasicBlock::iterator MBBI); + bool expandCmpMux(MachineBasicBlock::iterator MBBI); }; char SystemZExpandPseudo::ID = 0; } @@ -111,6 +124,142 @@ return true; } +bool SystemZExpandPseudo::expandAddSubMux(MachineBasicBlock::iterator MBBI) { + SystemZII::MuxInfo MuxI(*MBBI, TRI); + + // 1. See if legal already (after a free commute if needed). + if (MuxI.isAdd(TII) && MuxI.isHIGH(0) && MuxI.isLOW(1) && MuxI.isHIGH(2)) + MuxI.commute(TII); + if (MuxI.selectAddSubMux(TII)) + return true; + // 2. Use destination register if not used by any source operand and if it + // would make operands legal. + if (MuxI.differentRegs(0, 1) && MuxI.differentRegs(0, 2)) { + if (MuxI.isLOW(0)) { + if (MuxI.isLOW(1) && MuxI.isHIGH(2)) + MuxI.useDstRegForOp(2, TII); // LLH -> LLL + else if (MuxI.isHIGH(1) && (MuxI.isLOW(2) || MuxI.sameReg(1, 2))) + MuxI.useDstRegForOp(1, TII); // LHL/LH0H0 -> LLL + } + else if (MuxI.isLOW(1)) + MuxI.useDstRegForOp(1, TII); // HLX -> HHX + } + // 3. Handle the remaining cases + if (!MuxI.DstRegUsed) { + if (MuxI.isLOW(0) && MuxI.isLOW(1) && MuxI.isHIGH(2)) { + assert(MuxI.sameReg(0, 1) && "Case with free dst reg already handled."); + MuxI.RotateReg0(TRI, TII); // LLH -> HHH + } + else if (MuxI.isLOW(0) && MuxI.isHIGH(1) && MuxI.isLOW(2)) { + assert(MuxI.sameReg(0, 2) && "Case with free dst reg already handled."); + if (MuxI.sameReg64(0, 1)) + MuxI.commute(TII); // LHL -> LLH + MuxI.RotateReg0(TRI, TII); // LHL/LLH -> HHX + } + else if (MuxI.isLOW(0) && MuxI.isHIGH(1) && MuxI.isHIGH(2)) { + MuxI.RotateReg0(TRI, TII); // LHH -> HHH/HLH/HHL/HLL + if (MuxI.sameReg64(0, 1)) + MuxI.useDstRegForOp(1, TII); // HLX -> HHH + } + else if (MuxI.isHIGH(0) && MuxI.isLOW(1) && MuxI.isHIGH(2)) { + assert(MuxI.sameReg(0, 2) && "Case with free dst reg already handled."); + assert(!MuxI.isAdd(TII) && "Add was handled earlier."); + if (MuxI.differentRegs64(0, 1)) + MuxI.RotateReg0(TRI, TII); // HLH -> LLL + else + MuxI.commute(TII); // HLH -> HHL + } + } + assert((MuxI.DstRegUsed || MuxI.Rotated || MuxI.Commuted) && + "Should have tried to handle all cases by now."); + + bool Success = MuxI.selectAddSubMux(TII); + assert(Success && "Expected a legal register combination by now."); + if (MuxI.Rotated) { + MuxI.RotateReg0(TRI, TII); + MuxRotate2++; + } + + return true; +} + +bool SystemZExpandPseudo::expandCmpMux(MachineBasicBlock::iterator MBBI) { + SystemZII::MuxInfo MuxI(*MBBI, TRI); + MachineBasicBlock *MBB = MBBI->getParent(); + + // 1. See if legal already + if (MuxI.selectCmpMux(TII)) + return true; + assert(MuxI.isLOW(0) && MuxI.isHIGH(1) && "Only Cmp Lo/Hi is illegal"); + + // 2. Try to swap the operands if possible. + if (!SystemZRegisterInfo::isCCLiveOut(*MBB)) { + bool Swappable = true; + SmallVector CCUsers; + for (MachineBasicBlock::iterator Itr = std::next(MBBI); + Itr != MBB->end(); ++Itr) { + if (Itr->readsRegister(SystemZ::CC)) { + unsigned Flags = Itr->getDesc().TSFlags; + if ((Flags & SystemZII::CCMaskFirst) || (Flags & SystemZII::CCMaskLast)) + CCUsers.push_back(&*Itr); + else { + Swappable = false; + MuxCmpNonSwappable++; + break; + } + } + if (Itr->definesRegister(SystemZ::CC)) + break; + } + if (Swappable) { + assert(CCUsers.size() && "No CC users found?"); + // Update all CC users. + for (unsigned Idx = 0; Idx < CCUsers.size(); ++Idx) { + unsigned Flags = CCUsers[Idx]->getDesc().TSFlags; + unsigned FirstOpNum = ((Flags & SystemZII::CCMaskFirst) ? + 0 : CCUsers[Idx]->getNumExplicitOperands() - 2); + MachineOperand &CCMaskMO = CCUsers[Idx]->getOperand(FirstOpNum + 1); + switch(CCMaskMO.getImm()) { + case SystemZ::CCMASK_CMP_LT: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_GT); + break; + case SystemZ::CCMASK_CMP_GT: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_LT); + break; + case SystemZ::CCMASK_CMP_LE: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_GE); + break; + case SystemZ::CCMASK_CMP_GE: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_LE); + break; + case SystemZ::CCMASK_CMP_EQ: + case SystemZ::CCMASK_CMP_NE: + break; + default: + llvm_unreachable("Unexpected CCMask value."); + break; + } + } + // Swap the compare operands. + MuxI.commute(TII); + bool Success = MuxI.selectCmpMux(TII); + assert(Success && "Expected a legal register combination by now."); + MuxCmpSwap++; + return true; + } + } + else + MuxCmpCCLiveOut++; + + // 3. Use rotations. + MuxI.RotateReg0(TRI, TII); + bool Success = MuxI.selectCmpMux(TII); + assert(Success && "Expected a legal register combination by now."); + MuxI.RotateReg0(TRI, TII); + MuxRotate2++; + return true; +} + /// If MBBI references a pseudo instruction that should be expanded here, /// do the expansion and return true. Otherwise return false. bool SystemZExpandPseudo::expandMI(MachineBasicBlock &MBB, @@ -120,6 +269,21 @@ switch (MI.getOpcode()) { case SystemZ::LOCRMux: return expandLOCRMux(MBB, MBBI, NextMBBI); + + case SystemZ::CRMux: + case SystemZ::CLRMux: + return expandCmpMux(MBBI); // XXX is it worth waiting with this? + + case SystemZ::ARMux: // XXX is it worth waiting with this? + case SystemZ::ARMuxK: + case SystemZ::ALRMux: + case SystemZ::ALRMuxK: + case SystemZ::SRMux: + case SystemZ::SRMuxK: + case SystemZ::SLRMux: + case SystemZ::SLRMuxK: + return expandAddSubMux(MBBI); + default: break; } @@ -141,8 +305,45 @@ return Modified; } +bool SystemZExpandPseudo::updateLiveInLists_CC(MachineBasicBlock &MBB) { + if (MBB.isLiveIn(SystemZ::CC)) + return false; + bool Missing = false; + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + for (; MBBI != E; ++MBBI) { + if (MBBI->isDebugInstr()) + continue; + if (MBBI->readsRegister(SystemZ::CC)) { + Missing = true; + break; + } + if (MBBI->definesRegister(SystemZ::CC)) + break; + } + if (Missing || (MBBI == E && SystemZRegisterInfo::isCCLiveOut(MBB))) { + assert(&MBB != &*MBB.getParent()->begin() && + "CC can not be live-in in the entry block of the function."); + LLVM_DEBUG(dbgs() << "SystemZExpandPseudo: Adding missing CC to MBB " + << "live-in list!\n"); + MBB.addLiveIn(SystemZ::CC); + return true; + } + return false; +} + bool SystemZExpandPseudo::runOnMachineFunction(MachineFunction &MF) { TII = static_cast(MF.getSubtarget().getInstrInfo()); + Subtarget = &MF.getSubtarget(); + TRI = Subtarget->getRegisterInfo(); + + // We must know about all CC users when expanding CRMux, so better make + // sure all live-in CCs are in the live-in lists. + bool Change = true; + while (Change) { + Change = false; + for (auto &MBB : MF) + Change |= updateLiveInLists_CC(MBB); + } bool Modified = false; for (auto &MBB : MF) Index: lib/Target/SystemZ/SystemZInstrFormats.td =================================================================== --- lib/Target/SystemZ/SystemZInstrFormats.td +++ lib/Target/SystemZ/SystemZInstrFormats.td @@ -4574,6 +4574,19 @@ let OpType = "reg"; } +// Like BinaryRRAndK, but expanded after RA depending on the choice of register. +multiclass BinaryRRAndKPseudo { + let NumOpsKey = key, Predicates = [FeatureHighWord] in { + let NumOpsValue = "3" in + def K : Pseudo<(outs cls1:$R1), (ins cls2:$R2, cls3:$R3), []>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1, + Constraints = "$R1 = $R1src" in + def "" : Pseudo<(outs cls1:$R1), (ins cls2:$R1src, cls3:$R3), + [(set cls1:$R1, (operator cls2:$R1src, cls3:$R3))]>; + } +} + // Like BinaryRI, but expanded after RA depending on the choice of register. class BinaryRIPseudo @@ -4621,6 +4634,15 @@ let AccessBytes = bytes; } +// Like CompareRRE, but expanded after RA depending on the choice of +// register. +class CompareRREPseudo + : Pseudo<(outs), (ins cls1:$R1, cls2:$R2), + [(set CC, (operator cls1:$R1, cls2:$R2))]> { + let isCompare = 1; +} + // Like TestBinarySIL, but expanded later. class TestBinarySILPseudo : Pseudo<(outs), (ins bdaddr12only:$BD1, imm:$I2), Index: lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.h +++ lib/Target/SystemZ/SystemZInstrInfo.h @@ -155,8 +155,6 @@ unsigned HighOpcode) const; void expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode) const; - void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, - unsigned HighOpcode) const; void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned Size) const; void expandLoadStackGuard(MachineInstr *MI) const; @@ -248,7 +246,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const override; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; MachineInstr *foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, @@ -317,6 +316,42 @@ AliasAnalysis *AA = nullptr) const override; }; +namespace SystemZII { +struct MuxInfo { + MachineInstr *MI; + unsigned PseudoOpcode; + unsigned NumOps; + MachineBasicBlock *MBB; + DebugLoc DL; + unsigned Regs[3]; + unsigned Regs64[3]; + bool Rotated; + bool Commuted; + bool DstRegUsed; + MuxInfo(MachineInstr &mi_, const TargetRegisterInfo *TRI); + bool isHIGH(unsigned Idx) { + assert(Idx < NumOps && "Operand index out of range."); + return SystemZ::GRH32BitRegClass.contains(Regs[Idx]); + } + bool isLOW(unsigned Idx) { return !isHIGH(Idx); } + bool sameReg(unsigned A, unsigned B) { return Regs[A] == Regs[B]; } + bool differentRegs(unsigned A, unsigned B) { return !sameReg(A, B); } + bool sameReg64(unsigned A, unsigned B) { return Regs64[A] == Regs64[B]; } + bool differentRegs64(unsigned A, unsigned B) { return !sameReg64(A, B); } + void useDstRegForOp(unsigned Idx, const SystemZInstrInfo *TII); + void RotateReg0(const TargetRegisterInfo *TRI, + const SystemZInstrInfo *TII); + void negateRHSIfSub(const SystemZInstrInfo *TII); + void commute(const SystemZInstrInfo *TII); + bool selectAddSubMux(const SystemZInstrInfo *TII, + SlotIndexes *SI = nullptr); + bool selectCmpMux(const SystemZInstrInfo *TII); + bool isAdd(const SystemZInstrInfo *TII) const { + return TII->get(PseudoOpcode).isCommutable(); + } +}; +} // end namespace SystemZII + } // end namespace llvm #endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRINFO_H Index: lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.cpp +++ lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -46,7 +46,9 @@ #include "SystemZGenInstrInfo.inc" #define DEBUG_TYPE "systemz-II" -STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)"); +STATISTIC(PREGCOPIES, "Number of copies between allocatable physregs"); +STATISTIC(MuxCopyDst, "Number of Mux pseudos needing a copy to dst."); +STATISTIC(FOLDEDMEMOPS, "Number of memops folded."); // Return a mask with Count low bits set. static uint64_t allOnes(unsigned int Count) { @@ -198,29 +200,6 @@ MI.setDesc(get(Opcode)); } -// MI is a load-register-on-condition pseudo instruction. Replace it with -// LowOpcode if source and destination are both low GR32s and HighOpcode if -// source and destination are both high GR32s. -void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, - unsigned HighOpcode) const { - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(2).getReg(); - bool DestIsHigh = isHighReg(DestReg); - bool SrcIsHigh = isHighReg(SrcReg); - - if (!DestIsHigh && !SrcIsHigh) - MI.setDesc(get(LowOpcode)); - else if (DestIsHigh && SrcIsHigh) - MI.setDesc(get(HighOpcode)); - else - LOCRMuxJumps++; - - // If we were unable to implement the pseudo with a single instruction, we - // need to convert it back into a branch sequence. This cannot be done here - // since the caller of expandPostRAPseudo does not handle changes to the CFG - // correctly. This change is defered to the SystemZExpandPseudo pass. -} - // MI is an RR-style pseudo instruction that zero-extends the low Size bits // of one GRX32 into another. Replace it with LowOpcode if both operands // are low registers, otherwise use RISB[LH]G. @@ -794,6 +773,8 @@ return; } + PREGCOPIES++; + if (SystemZ::GRX32BitRegClass.contains(DestReg, SrcReg)) { emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc, false); @@ -1060,8 +1041,9 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS) const { + LiveIntervals *LIS, VirtRegMap *VRM) const { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Size = MFI.getObjectSize(FrameIndex); unsigned Opcode = MI.getOpcode(); @@ -1087,6 +1069,7 @@ .addImm(MI.getOperand(2).getImm()); BuiltMI->findRegisterDefOperand(SystemZ::CC)->setIsDead(true); CCLiveRange.createDeadDef(MISlot, LIS->getVNInfoAllocator()); + FOLDEDMEMOPS++; return BuiltMI; } } @@ -1113,6 +1096,7 @@ .addImm(0) .addImm(MI.getOperand(2).getImm()); transferDeadCC(&MI, BuiltMI); + FOLDEDMEMOPS++; return BuiltMI; } @@ -1128,6 +1112,7 @@ .addImm(0) .addImm((int8_t)MI.getOperand(2).getImm()); transferDeadCC(&MI, BuiltMI); + FOLDEDMEMOPS++; return BuiltMI; } @@ -1143,6 +1128,7 @@ .addImm(0) .addImm((int8_t)-MI.getOperand(2).getImm()); transferDeadCC(&MI, BuiltMI); + FOLDEDMEMOPS++; return BuiltMI; } @@ -1153,6 +1139,7 @@ // source register instead. if (OpNum == 0) { unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD; + FOLDEDMEMOPS++; return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(StoreOpcode)) .add(MI.getOperand(1)) @@ -1164,6 +1151,7 @@ // destination register instead. if (OpNum == 1) { unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD; + FOLDEDMEMOPS++; return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpcode)) .add(MI.getOperand(0)) @@ -1191,6 +1179,7 @@ if (MMO->getSize() == Size && !MMO->isVolatile() && !MMO->isAtomic()) { // Handle conversion of loads. if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXLoad)) { + FOLDEDMEMOPS++; return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(SystemZ::MVC)) .addFrameIndex(FrameIndex) @@ -1202,6 +1191,7 @@ } // Handle conversion of stores. if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXStore)) { + FOLDEDMEMOPS++; return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(SystemZ::MVC)) .add(MI.getOperand(1)) @@ -1214,9 +1204,65 @@ } } - // If the spilled operand is the final one, try to change R - // into . - int MemOpcode = SystemZ::getMemOpcode(Opcode); + // If the spilled operand is the final one, try to change R into + // . These Muxes may have a suitable memory instruction. It seems to + // work best to decide on a high/low opcode at this point and constrain the + // register class accordingly if needed. + int MemOpcode = -1; + const TargetRegisterClass *Op0RC32 = nullptr; + switch(Opcode) { + case SystemZ::ARMux: + case SystemZ::ALRMux: + case SystemZ::SRMux: + case SystemZ::SLRMux: + case SystemZ::CRMux: + case SystemZ::CLRMux: + Op0RC32 = RI.getRC32(MI.getOperand(0), VRM, &MRI); + break; + default: break; + } + const TargetRegisterClass *ConstrainedRC = nullptr; + if (Op0RC32 != nullptr) { + if (Op0RC32 != &SystemZ::GRH32BitRegClass) { + switch(Opcode) { + case SystemZ::ARMux: + MemOpcode = SystemZ::A; + break; + case SystemZ::ALRMux: + MemOpcode = SystemZ::AL; + break; + case SystemZ::SRMux: + MemOpcode = SystemZ::S; + break; + case SystemZ::SLRMux: + MemOpcode = SystemZ::SL; + break; + case SystemZ::CRMux: + MemOpcode = SystemZ::C; + break; + case SystemZ::CLRMux: + MemOpcode = SystemZ::CL; + break; + default: break; + } + assert(MemOpcode != -1 && "Should have a opcode for low-part by now."); + ConstrainedRC = &SystemZ::GR32BitRegClass; + } else { + switch(Opcode) { + case SystemZ::CRMux: + MemOpcode = SystemZ::CHF; + break; + case SystemZ::CLRMux: + MemOpcode = SystemZ::CLHF; + break; + default: break; + } + if (MemOpcode != -1) + ConstrainedRC = &SystemZ::GRH32BitRegClass; + } + } + if (MemOpcode == -1) + MemOpcode = SystemZ::getMemOpcode(Opcode); if (MemOpcode >= 0) { unsigned NumOps = MI.getNumExplicitOperands(); if (OpNum == NumOps - 1) { @@ -1233,6 +1279,12 @@ if (MemDesc.TSFlags & SystemZII::HasIndex) MIB.addReg(0); transferDeadCC(&MI, MIB); + if (ConstrainedRC != nullptr) { + unsigned Reg = MIB->getOperand(0).getReg(); + if (MRI.getRegClass(Reg) == &SystemZ::GRX32BitRegClass) + MRI.constrainRegClass(Reg, ConstrainedRC); + } + FOLDEDMEMOPS++; return MIB; } } @@ -1301,10 +1353,6 @@ expandLOCPseudo(MI, SystemZ::LOCHI, SystemZ::LOCHHI); return true; - case SystemZ::LOCRMux: - expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR); - return true; - case SystemZ::STCMux: expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH); return true; @@ -1385,14 +1433,6 @@ expandRIPseudo(MI, SystemZ::AFI, SystemZ::AIH, false); return true; - case SystemZ::CHIMux: - expandRIPseudo(MI, SystemZ::CHI, SystemZ::CIH, false); - return true; - - case SystemZ::CFIMux: - expandRIPseudo(MI, SystemZ::CFI, SystemZ::CIH, false); - return true; - case SystemZ::CLFIMux: expandRIPseudo(MI, SystemZ::CLFI, SystemZ::CLIH, false); return true; @@ -1815,3 +1855,174 @@ return false; } + +SystemZII::MuxInfo::MuxInfo(MachineInstr &mi_, + const TargetRegisterInfo *TRI) : MI(&mi_) { + PseudoOpcode = mi_.getOpcode(); + NumOps = (MI->isCompare() ? 2 : 3); + MBB = MI->getParent(); + DL = MI->getDebugLoc(); + for (unsigned OpIdx = 0; OpIdx < NumOps; ++OpIdx) { + Regs[OpIdx] = MI->getOperand(OpIdx).getReg(); + assert(SystemZ::GRX32BitRegClass.contains(Regs[OpIdx]) && + "Expected a GRX32 register operand."); + unsigned SubRegIdx = (SystemZ::GR32BitRegClass.contains(Regs[OpIdx]) ? + SystemZ::subreg_l32 : SystemZ::subreg_h32); + Regs64[OpIdx] = TRI->getMatchingSuperReg(Regs[OpIdx], SubRegIdx, + &SystemZ::GR64BitRegClass); + } + Rotated = false; + Commuted = false; + DstRegUsed = false; +} + +void SystemZII::MuxInfo::useDstRegForOp(unsigned Idx, + const SystemZInstrInfo *TII) { + TII->copyPhysReg(*MBB, MI, DL, Regs[0], Regs[Idx], false/*KillSrc*/); + unsigned ReplacedReg = Regs[Idx]; + for (unsigned OpIdx = 1; OpIdx < NumOps; ++OpIdx) + if (Regs[OpIdx] == ReplacedReg) { + Regs[OpIdx] = Regs[0]; + Regs64[OpIdx] = Regs64[0]; + } + MuxCopyDst++; + DstRegUsed = true; +} + +void SystemZII::MuxInfo::RotateReg0(const TargetRegisterInfo *TRI, + const SystemZInstrInfo *TII) { + // Put the second rotation after MI. + MachineBasicBlock::iterator MBBI = MI; + MachineBasicBlock::iterator InsPos = (Rotated ? std::next(MBBI) : MBBI); + BuildMI(*MBB, InsPos, DL, TII->get(SystemZ::RLLG), Regs64[0]) + .addReg(Regs64[0]).addReg(0).addImm(32); + unsigned Reg = Regs[0]; + unsigned OtherSubRegIdx = + isLOW(0) ? SystemZ::subreg_h32 : SystemZ::subreg_l32; + unsigned OtherReg = TRI->getSubReg(Regs64[0], OtherSubRegIdx); + for (unsigned Idx = 0; Idx < NumOps; ++Idx) { + if (Regs[Idx] == Reg) + Regs[Idx] = OtherReg; + else if (Regs[Idx] == OtherReg) + Regs[Idx] = Reg; + } + Rotated = true; +} + +void SystemZII::MuxInfo::negateRHSIfSub(const SystemZInstrInfo *TII) { + switch(PseudoOpcode) { + case SystemZ::SRMux: + case SystemZ::SRMuxK: + PseudoOpcode = SystemZ::ARMux; + break; + case SystemZ::SLRMux: + case SystemZ::SLRMuxK: + PseudoOpcode = SystemZ::ALRMux; + break; + default: + return; + } + if (isLOW(2)) { + BuildMI(*MBB, MI, DL, TII->get(SystemZ::LNR), Regs[2]) + .addReg(Regs[2]); + } else { + BuildMI(*MBB, MI, DL, TII->get(SystemZ::XIHF), Regs[2]) + .addReg(Regs[2]) + .addImm(0xffffffff); + BuildMI(*MBB, MI, DL, TII->get(SystemZ::AIH), Regs[2]) + .addReg(Regs[2]) + .addImm(1); + } +} + +void SystemZII::MuxInfo::commute(const SystemZInstrInfo *TII) { + negateRHSIfSub(TII); + + unsigned LHS = (NumOps == 3 ? 1 : 0); + unsigned RHS = (NumOps == 3 ? 2 : 1); + unsigned RegTmp = Regs[RHS]; + Regs[RHS] = Regs[LHS]; + Regs[LHS] = RegTmp; + RegTmp = Regs64[RHS]; + Regs64[RHS] = Regs64[LHS]; + Regs64[LHS] = RegTmp; + Commuted = true; +} + +bool SystemZII::MuxInfo::selectAddSubMux(const SystemZInstrInfo *TII, + SlotIndexes *SI) { + unsigned LowOpcode, HiOpcode, HiHiLoOpcode; + if (PseudoOpcode == SystemZ::ARMux || PseudoOpcode == SystemZ::ARMuxK) { + LowOpcode = SystemZ::AR; + HiOpcode = SystemZ::AHHHR; + HiHiLoOpcode = SystemZ::AHHLR; + } + else if (PseudoOpcode == SystemZ::SRMux || PseudoOpcode == SystemZ::SRMuxK) { + LowOpcode = SystemZ::SR; + HiOpcode = SystemZ::SHHHR; + HiHiLoOpcode = SystemZ::SHHLR; + } + else if (PseudoOpcode == SystemZ::ALRMux || PseudoOpcode == SystemZ::ALRMuxK) { + LowOpcode = SystemZ::ALR; + HiOpcode = SystemZ::ALHHHR; + HiHiLoOpcode = SystemZ::ALHHLR; + } + else if (PseudoOpcode == SystemZ::SLRMux || PseudoOpcode == SystemZ::SLRMuxK) { + LowOpcode = SystemZ::SLR; + HiOpcode = SystemZ::SLHHHR; + HiHiLoOpcode = SystemZ::SLHHLR; + } + else + llvm_unreachable("Bad opcode."); + + unsigned Opcode = 0; + if (isLOW(0) && isLOW(1) && isLOW(2)) + Opcode = (Regs[0] == Regs[1] ? + LowOpcode : SystemZ::getThreeOperandOpcode(LowOpcode)); + else if (isHIGH(0) && isHIGH(1) && isHIGH(2)) + Opcode = HiOpcode; + else if (isHIGH(0) && isHIGH(1) && isLOW(2)) + Opcode = HiHiLoOpcode; + else + return false; + MachineInstr *NewMI = BuildMI(*MBB, MI, DL, TII->get(Opcode), Regs[0]) + .addReg(Regs[1]) + .addReg(Regs[2]); + if (SI != nullptr) + SI->replaceMachineInstrInMaps(*MI, *NewMI); + MI->eraseFromParent(); + MI = NewMI; + return true; +} + +bool SystemZII::MuxInfo::selectCmpMux(const SystemZInstrInfo *TII) { + unsigned LowOpcode, HiOpcode, HiLowOpcode; + if (PseudoOpcode == SystemZ::CRMux) { + LowOpcode = SystemZ::CR; + HiOpcode = SystemZ::CHHR; + HiLowOpcode = SystemZ::CHLR; + } else if (PseudoOpcode == SystemZ::CLRMux) { + LowOpcode = SystemZ::CLR; + HiOpcode = SystemZ::CLHHR; + HiLowOpcode = SystemZ::CLHLR; + } else + llvm_unreachable("Bad opcode."); + + unsigned Opcode = 0; + if (isLOW(0) && isLOW(1)) + Opcode = LowOpcode; + else if (isHIGH(0) && isHIGH(1)) + Opcode = HiOpcode; + else if (isHIGH(0) && isLOW(1)) + Opcode = HiLowOpcode; + else + return false; + if (Commuted || Rotated) + for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { + MachineOperand &MO = MI->getOperand(OpIdx); + MO.setReg(Regs[OpIdx]); + MO.setIsKill(false); + } + MI->setDesc(TII->get(Opcode)); + return true; +} Index: lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.td +++ lib/Target/SystemZ/SystemZInstrInfo.td @@ -890,6 +890,8 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Addition of a register. let isCommutable = 1 in { + // Expands to AR(K), AHHHR or AHHLR, depending on the choice of register. + defm ARMux : BinaryRRAndKPseudo<"armux", z_sadd, GRX32, GRX32, GRX32>; defm AR : BinaryRRAndK<"ar", 0x1A, 0xB9F8, z_sadd, GR32, GR32>; defm AGR : BinaryRREAndK<"agr", 0xB908, 0xB9E8, z_sadd, GR64, GR64>; } @@ -932,6 +934,8 @@ let Defs = [CC] in { // Addition of a register. let isCommutable = 1 in { + // Expands to ALR(K), ALHHHR or ALHHLR, depending on the choice of register. + defm ALRMux : BinaryRRAndKPseudo<"alrmux", z_uadd, GRX32, GRX32, GRX32>; defm ALR : BinaryRRAndK<"alr", 0x1E, 0xB9FA, z_uadd, GR32, GR32>; defm ALGR : BinaryRREAndK<"algr", 0xB90A, 0xB9EA, z_uadd, GR64, GR64>; } @@ -991,6 +995,8 @@ // Subtraction producing a signed overflow flag. let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { // Subtraction of a register. + // Expands to SR(K), SHHHR or SHHLR, depending on the choice of register. + defm SRMux : BinaryRRAndKPseudo<"srmux", z_ssub, GRX32, GRX32, GRX32>; defm SR : BinaryRRAndK<"sr", 0x1B, 0xB9F9, z_ssub, GR32, GR32>; def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>; defm SGR : BinaryRREAndK<"sgr", 0xB909, 0xB9E9, z_ssub, GR64, GR64>; @@ -1032,6 +1038,8 @@ // Subtraction producing a carry. let Defs = [CC] in { // Subtraction of a register. + // Expands to SLR(K), SLHHHR or SLHHLR, depending on the choice of register. + defm SLRMux : BinaryRRAndKPseudo<"slrmux", z_usub, GRX32, GRX32, GRX32>; defm SLR : BinaryRRAndK<"slr", 0x1F, 0xB9FB, z_usub, GR32, GR32>; def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>; defm SLGR : BinaryRREAndK<"slgr", 0xB90B, 0xB9EB, z_usub, GR64, GR64>; @@ -1417,7 +1425,10 @@ // some of the signed forms have COMPARE AND BRANCH equivalents whereas none // of the unsigned forms do. let Defs = [CC], CCValues = 0xE in { - // Comparison with a register. + // Comparison with a register. CRMux expands to CR, CHHR or CHLR, + // depending on the choice of register. + def CRMux : CompareRREPseudo, + Requires<[FeatureHighWord]>; def CR : CompareRR <"cr", 0x19, z_scmp, GR32, GR32>; def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>; def CGR : CompareRRE<"cgr", 0xB920, z_scmp, GR64, GR64>; @@ -1469,7 +1480,10 @@ // Unsigned comparisons. let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { - // Comparison with a register. + // Comparison with a register. CLRMux expands to CLR, CLHHR or CLHLR, + // depending on the choice of register. + def CLRMux : CompareRREPseudo, + Requires<[FeatureHighWord]>; def CLR : CompareRR <"clr", 0x15, z_ucmp, GR32, GR32>; def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>; def CLGR : CompareRRE<"clgr", 0xB921, z_ucmp, GR64, GR64>; Index: lib/Target/SystemZ/SystemZRegisterInfo.h =================================================================== --- lib/Target/SystemZ/SystemZRegisterInfo.h +++ lib/Target/SystemZ/SystemZRegisterInfo.h @@ -49,12 +49,19 @@ const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override; + static bool isCCLiveOut(MachineBasicBlock &MBB); + const TargetRegisterClass* getRC32(MachineOperand &MO, + const VirtRegMap *VRM, + const MachineRegisterInfo *MRI) const; bool getRegAllocationHints(unsigned VirtReg, ArrayRef Order, SmallVectorImpl &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override; + bool allowHintRecoloring(unsigned VReg, + unsigned NewPhysReg, + const VirtRegMap *VRM) const override; // Override TargetRegisterInfo.h. bool requiresRegisterScavenging(const MachineFunction &MF) const override { Index: lib/Target/SystemZ/SystemZRegisterInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -21,16 +21,18 @@ #define GET_REGINFO_TARGET_DESC #include "SystemZGenRegisterInfo.inc" +#define DEBUG_TYPE "regalloc" + SystemZRegisterInfo::SystemZRegisterInfo() : SystemZGenRegisterInfo(SystemZ::R14D) {} // Given that MO is a GRX32 operand, return either GR32 or GRH32 if MO // somehow belongs in it. Otherwise, return GRX32. -static const TargetRegisterClass *getRC32(MachineOperand &MO, - const VirtRegMap *VRM, - const MachineRegisterInfo *MRI) { +const TargetRegisterClass* +SystemZRegisterInfo::getRC32(MachineOperand &MO, + const VirtRegMap *VRM, + const MachineRegisterInfo *MRI) const { const TargetRegisterClass *RC = MRI->getRegClass(MO.getReg()); - if (SystemZ::GR32BitRegClass.hasSubClassEq(RC) || MO.getSubReg() == SystemZ::subreg_l32 || MO.getSubReg() == SystemZ::subreg_hl32) @@ -49,7 +51,7 @@ return &SystemZ::GRH32BitRegClass; } - assert (RC == &SystemZ::GRX32BitRegClass); + assert(RC == &SystemZ::GRX32BitRegClass); return RC; } @@ -73,6 +75,20 @@ Hints.push_back(Reg); } +// EXPERIMENTAL +#include "llvm/Support/CommandLine.h" +static cl::opt HARDHIORLO("hard-hi-or-lo", cl::init(false)); +static cl::opt SOFTHIORLO("soft-hi-or-lo", cl::init(false)); +static cl::opt HARD_LT_HINTS("hard-lt-hints", cl::init(false)); +static cl::opt HARD_LL_CH_HINTS("hard-ll-ch-hints", cl::init(false)); + +bool SystemZRegisterInfo::isCCLiveOut(MachineBasicBlock &MBB) { + for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(SystemZ::CC)) + return true; + return false; +} + bool SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, ArrayRef Order, @@ -81,66 +97,256 @@ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( VirtReg, Order, Hints, MF, VRM, Matrix); if (MRI->getRegClass(VirtReg) == &SystemZ::GRX32BitRegClass) { + // A map from virtual registers to their register classes. + struct Reg2RCMap : std::map { + unsigned VirtReg_; + bool Change; + const TargetRegisterClass *SoftHintRC; + Reg2RCMap(unsigned Reg) : VirtReg_(Reg) { reset(); } + void reset() { + Change = false; + SoftHintRC = nullptr; + } + void findRC(MachineOperand &MO, + const VirtRegMap *VRM, + const MachineRegisterInfo *MRI, + const SystemZRegisterInfo *TRI) { + if (find(MO.getReg()) != end()) + return; + const TargetRegisterClass *RC = TRI->getRC32(MO, VRM, MRI); + (*this)[MO.getReg()] = RC; + } + void constrainReg(unsigned Reg, const TargetRegisterClass *RC, + bool HardHint = true) { + assert((RC == &SystemZ::GR32BitRegClass || + RC == &SystemZ::GRH32BitRegClass) && + "Should constrain to either low or high parts."); + if (!HardHint) { + if (Reg == VirtReg_) + SoftHintRC = RC; + return; + } + if ((*this)[Reg] == &SystemZ::GRX32BitRegClass) { + (*this)[Reg] = RC; + Change = true; + } else if ((*this)[Reg] != nullptr && (*this)[Reg] != RC) { + // Don't hint either way if the opposite RC is also needed. + (*this)[Reg] = nullptr; + Change = true; + } + } + } Reg2RC(VirtReg); + // Search all use/def connected instructions iteratively to propagate the + // requirements of registers being in either GR32 or GR32H. At the end, + // pass hints for VirtReg if either GR32 or GR32H is the result. + const TargetRegisterClass *LOW = &SystemZ::GR32BitRegClass; + const TargetRegisterClass *HIGH = &SystemZ::GRH32BitRegClass; SmallVector Worklist; SmallSet DoneRegs; - Worklist.push_back(VirtReg); - while (Worklist.size()) { - unsigned Reg = Worklist.pop_back_val(); - if (!DoneRegs.insert(Reg).second) - continue; - - for (auto &Use : MRI->use_instructions(Reg)) { - // For LOCRMux, see if the other operand is already a high or low - // register, and in that case give the correpsonding hints for - // VirtReg. LOCR instructions need both operands in either high or - // low parts. - if (Use.getOpcode() == SystemZ::LOCRMux) { - MachineOperand &TrueMO = Use.getOperand(1); - MachineOperand &FalseMO = Use.getOperand(2); - const TargetRegisterClass *RC = - TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI), - getRC32(TrueMO, VRM, MRI)); - if (RC && RC != &SystemZ::GRX32BitRegClass) { - addHints(Order, Hints, RC, MRI); - // Return true to make these hints the only regs available to - // RA. This may mean extra spilling but since the alternative is - // a jump sequence expansion of the LOCRMux, it is preferred. - return true; - } + Reg2RC.Change = true; + while (Reg2RC.Change) { + Reg2RC.reset(); + Worklist.clear(); + Worklist.push_back(VirtReg); + DoneRegs.clear(); + while (Worklist.size()) { + unsigned Reg = Worklist.pop_back_val(); + if (!DoneRegs.insert(Reg).second) + continue; + for (MachineInstr &MI : MRI->reg_nodbg_instructions(Reg)) { + unsigned Regs[3] = {0, 0, 0}; + bool HasHigh = false; + bool HasLow = false; + unsigned OpI = 0; + auto processOperands = [&]() -> void { + for (; OpI < MI.getNumOperands(); ++OpI) { + MachineOperand &MO = MI.getOperand(OpI); + if (!MO.isReg() || MO.isImplicit() || + MRI->getRegClass(MO.getReg()) == &SystemZ::ADDR64BitRegClass) + break; + assert(OpI < 3 && "Index out of range."); + Regs[OpI] = MO.getReg(); + Reg2RC.findRC(MO, VRM, MRI, this); + // Add GRX32 registers to worklist + if (Reg2RC[MO.getReg()] == &SystemZ::GRX32BitRegClass) + Worklist.push_back(MO.getReg()); + else if (Reg2RC[MO.getReg()] == HIGH) + HasHigh = true; + else + HasLow = true; + } + }; + // Add hints for all operands to make them all high or low if + // possible, to avoid the slower High/Low instructions. If + // OnlyExtra is false, give hard hints. If OnlyExtra is true, give + // hard or soft hints only if one of the experimental CL options + // for this was given. + auto addHiOrLoHints = [&](bool OnlyExtra) -> void { + if (!HARDHIORLO && !SOFTHIORLO && OnlyExtra) + return; + bool HintHard = (OnlyExtra ? HARDHIORLO : true); + const TargetRegisterClass *RC = nullptr; + if (HasHigh && !HasLow) + RC = HIGH; + else if (HasLow && !HasHigh) + RC = LOW; + if (RC != nullptr) + for (unsigned Idx = 0; Regs[Idx] && Idx < 3; ++Idx) + Reg2RC.constrainReg(Regs[Idx], RC, HintHard); + }; + + switch (MI.getOpcode()) { + case SystemZ::CRMux: + case SystemZ::CLRMux: + processOperands(); + // Avoid needing to rotate the LH case. + if (isCCLiveOut(*MI.getParent())) { + // L_ -> LL + if (Reg2RC[Regs[0]] == LOW) + Reg2RC.constrainReg(Regs[1], LOW); + // _H -> HH + else if (Reg2RC[Regs[1]] == HIGH) + Reg2RC.constrainReg(Regs[0], HIGH); + } + addHiOrLoHints(true/*OnlyExtra*/); + break; - // Add the other operand of the LOCRMux to the worklist. - unsigned OtherReg = - (TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg()); - if (MRI->getRegClass(OtherReg) == &SystemZ::GRX32BitRegClass) - Worklist.push_back(OtherReg); - } // end LOCRMux - else if (Use.getOpcode() == SystemZ::CHIMux || - Use.getOpcode() == SystemZ::CFIMux) { - if (Use.getOperand(1).getImm() == 0) { - bool OnlyLMuxes = true; - for (MachineInstr &DefMI : MRI->def_instructions(VirtReg)) - if (DefMI.getOpcode() != SystemZ::LMux) - OnlyLMuxes = false; - if (OnlyLMuxes) { - addHints(Order, Hints, &SystemZ::GR32BitRegClass, MRI); - // Return false to make these hints preferred but not obligatory. - return false; + case SystemZ::SRMux: + case SystemZ::SRMuxK: + case SystemZ::SLRMux: + case SystemZ::SLRMuxK: + processOperands(); + // H__ -> HH_ + if (Reg2RC[Regs[0]] == HIGH) { + Reg2RC.constrainReg(Regs[1], HIGH); + break; + } + // _L_ -> LLL + if (Reg2RC[Regs[1]] == LOW) { + Reg2RC.constrainReg(Regs[0], LOW); + Reg2RC.constrainReg(Regs[2], LOW); + break; + } + LLVM_FALLTHROUGH; + case SystemZ::ARMux: + case SystemZ::ARMuxK: + case SystemZ::ALRMux: + case SystemZ::ALRMuxK: + processOperands(); + // L__ -> LLL + if (Reg2RC[Regs[0]] == LOW) { + Reg2RC.constrainReg(Regs[1], LOW); + Reg2RC.constrainReg(Regs[2], LOW); } + // _LL -> LLL + else if (Reg2RC[Regs[1]] == LOW && Reg2RC[Regs[2]] == LOW) + Reg2RC.constrainReg(Regs[0], LOW); + else if (Reg2RC[Regs[0]] == HIGH) { + // HL_ -> HLH + if (Reg2RC[Regs[1]] == LOW) + Reg2RC.constrainReg(Regs[2], HIGH); + // H_L -> HHL + else if (Reg2RC[Regs[2]] == LOW) + Reg2RC.constrainReg(Regs[1], HIGH); + // Hrr -> HHH + else if (Regs[1] == Regs[2]) + Reg2RC.constrainReg(Regs[1], HIGH); + } + // _H? -> HH? + else if (Reg2RC[Regs[1]] == HIGH || Reg2RC[Regs[2]] == HIGH) + Reg2RC.constrainReg(Regs[0], HIGH); + addHiOrLoHints(true/*OnlyExtra*/); + break; + + case SystemZ::LOCRMux: + processOperands(); + addHiOrLoHints(false/*OnlyExtra*/); + break; + + case SystemZ::LLCMux: + case SystemZ::LLHMux: + processOperands(); + Reg2RC.constrainReg(Regs[0], LOW, HARD_LL_CH_HINTS/*HardHint*/); + break; + + case SystemZ::CHIMux: + case SystemZ::CFIMux: + processOperands(); + if (MI.getOperand(1).getImm() == 0) { + bool OnlyLMuxes = true; + for (MachineInstr &DefMI : MRI->def_instructions(Reg)) + if (DefMI.getOpcode() != SystemZ::LMux) + OnlyLMuxes = false; + if (OnlyLMuxes) + Reg2RC.constrainReg(Regs[0], LOW, HARD_LT_HINTS/*HardHint*/); + } + break; + + default: break; } - } // end CHIMux / CFIMux - } + } // MI + } // Worklist + } // Change + const TargetRegisterClass *ResultRC = Reg2RC[VirtReg]; + if (ResultRC == nullptr) + return BaseImplRetVal; + if (ResultRC != &SystemZ::GRX32BitRegClass) { + assert ((ResultRC == LOW || ResultRC == HIGH) && "Bad Regclass."); + addHints(Order, Hints, ResultRC, MRI); + return true; + } + if (Reg2RC.SoftHintRC != nullptr) { + assert ((Reg2RC.SoftHintRC == LOW || Reg2RC.SoftHintRC == HIGH) && + "Bad Regclass."); + addHints(Order, Hints, Reg2RC.SoftHintRC, MRI); + return false; } } return BaseImplRetVal; } +bool SystemZRegisterInfo::allowHintRecoloring(unsigned VReg, + unsigned NewPhysReg, + const VirtRegMap *VRM) const { + if (!SystemZ::GRX32BitRegClass.contains(NewPhysReg)) + return true; + unsigned CurrPhys = VRM->getPhys(VReg); + assert(SystemZ::GRX32BitRegClass.contains(CurrPhys) && "Expected GRX32 reg."); + if (SystemZ::GR32BitRegClass.contains(CurrPhys) == + SystemZ::GR32BitRegClass.contains(NewPhysReg)) + return true; + + // This is a GRX32 register that has changed between low and high parts. + MachineRegisterInfo *MRI = &VRM->getRegInfo(); + for (const MachineInstr &MI : MRI->reg_nodbg_instructions(VReg)) + switch (MI.getOpcode()) { + case SystemZ::LOCRMux: + case SystemZ::CRMux: + case SystemZ::CLRMux: + case SystemZ::SRMuxK: + case SystemZ::SLRMuxK: + case SystemZ::SRMux: + case SystemZ::SLRMux: + case SystemZ::ARMux: + case SystemZ::ARMuxK: + case SystemZ::ALRMux: + case SystemZ::ALRMuxK: + LLVM_DEBUG(dbgs() << "SystemZ: Avoiding replacing " + << getName(CurrPhys) << " with " + << getName(NewPhysReg) << " in "; MI.dump()); + return false; + default: break; + } + + return true; +} + const MCPhysReg * SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const SystemZSubtarget &Subtarget = MF->getSubtarget(); Index: lib/Target/SystemZ/SystemZScheduleZ13.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZ13.td +++ lib/Target/SystemZ/SystemZScheduleZ13.td @@ -366,6 +366,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; @@ -397,6 +398,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], @@ -547,6 +549,7 @@ def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; Index: lib/Target/SystemZ/SystemZScheduleZ14.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZ14.td +++ lib/Target/SystemZ/SystemZScheduleZ14.td @@ -367,6 +367,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; @@ -398,6 +399,7 @@ def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], @@ -557,6 +559,7 @@ def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; Index: lib/Target/SystemZ/SystemZScheduleZ196.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZ196.td +++ lib/Target/SystemZ/SystemZScheduleZ196.td @@ -326,6 +326,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU2, GroupAlone], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "A(L)?G$")>; @@ -359,6 +360,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU2, GroupAlone], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU, LSU, GroupAlone], @@ -508,6 +510,7 @@ def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXU2, LSU, GroupAlone], Index: lib/Target/SystemZ/SystemZScheduleZEC12.td =================================================================== --- lib/Target/SystemZ/SystemZScheduleZEC12.td +++ lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -337,6 +337,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "AR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "A(L)?RMux(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "ALSIH(N)?$")>; def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "A(L)?G$")>; @@ -370,6 +371,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "SR(K)?$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?HHHR$")>; def : InstRW<[WLat2, WLat2, FXU, NormalGr], (instregex "S(L)?HHLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "S(L)?RMux(K)?$")>; // Subtraction with borrow def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXU, LSU, GroupAlone], @@ -519,6 +521,7 @@ def : InstRW<[WLat1LSU, FXU, LSU, NormalGr], (instregex "CLRL$")>; def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?HHR$")>; def : InstRW<[WLat2, FXU, NormalGr], (instregex "C(L)?HLR$")>; +def : InstRW<[WLat1, FXU, NormalGr], (instregex "C(L)?RMux")>; // Compare halfword def : InstRW<[WLat2LSU, RegReadAdv, FXU, LSU, NormalGr], (instregex "CH(Y)?$")>; Index: lib/Target/SystemZ/SystemZSelectMux.cpp =================================================================== --- /dev/null +++ lib/Target/SystemZ/SystemZSelectMux.cpp @@ -0,0 +1,209 @@ +//==------- SystemZSelectMux.cpp - Select Mux instructions --------*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that is run immediately after VirtRegRewriter +// and before MachineCopyPropagation. The purpose is to set the opcodes of +// GRX32 Mux pseudos to match the choice of registers which have been +// carefully hinted during register allocation. It is important to do this +// before any later pass might substitute a register for another without +// knowledge of the actual legal combinations of high/low register +// assignments. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "SystemZInstrInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveStacks.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +using namespace llvm; + +#define SYSTEMZ_EXPAND_PSEUDO_NAME "SystemZ Mux pseudo selection pass" + +#define DEBUG_TYPE "systemz-selectmux" +STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)"); +STATISTIC(LOCRs, "Number of LOCRs"); +STATISTIC(MUXLOWs, "Number of Muxes using only low parts"); +STATISTIC(MUXHIGHs, "Number of Muxes using only high parts"); +STATISTIC(MUXHILOs, "Number of Muxes using high and low parts"); +STATISTIC(BADMUXs, "Number of Muxes ending up in illegal registers"); + +namespace llvm { + void initializeSystemZSelectMuxPass(PassRegistry&); +} + +namespace { +class SystemZSelectMux : public MachineFunctionPass { +public: + static char ID; + SystemZSelectMux() : MachineFunctionPass(ID) { + initializeSystemZSelectMuxPass(*PassRegistry::getPassRegistry()); + } + + const SystemZInstrInfo *TII; + const TargetRegisterInfo *TRI; + SlotIndexes *SI; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + StringRef getPassName() const override { return SYSTEMZ_EXPAND_PSEUDO_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addUsedIfAvailable(); + AU.addPreserved(); + AU.addPreserved(); + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + bool selectMBB(MachineBasicBlock &MBB); + bool selectMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); +}; +char SystemZSelectMux::ID = 0; +} + +INITIALIZE_PASS(SystemZSelectMux, "systemz-select-mux", + SYSTEMZ_EXPAND_PSEUDO_NAME, false, false) + +/// Returns an instance of the Select Mux pass. +FunctionPass *llvm::createSystemZSelectMuxPass(SystemZTargetMachine &TM) { + return new SystemZSelectMux(); +} + +/// If MBBI references a pseudo instruction that should be selected here, +/// do it and return true. Otherwise return false. +bool SystemZSelectMux::selectMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + case SystemZ::LOCRMux: { + SystemZII::MuxInfo MuxI(MI, TRI); + if (MuxI.isLOW(0) && MuxI.isLOW(2)) + MI.setDesc(TII->get(SystemZ::LOCR)); + else if (MuxI.isHIGH(0) && MuxI.isHIGH(2)) + MI.setDesc(TII->get(SystemZ::LOCFHR)); + else { + // If we were unable to implement the pseudo with a single instruction, + // we need to convert it back into a branch sequence. This cannot be + // done here since the caller of expandPostRAPseudo does not handle + // changes to the CFG correctly. This change is defered to the + // SystemZExpandPseudo pass. + LOCRMuxJumps++; + return false; + } + LOCRs++; + return true; + } + + case SystemZ::CHIMux: { + bool IsHigh = SystemZ::GRH32BitRegClass.contains(MI.getOperand(0).getReg()); + MI.setDesc(TII->get(IsHigh ? SystemZ::CIH :SystemZ::CHI)); + return true; + } + + case SystemZ::CFIMux: { + bool IsHigh = SystemZ::GRH32BitRegClass.contains(MI.getOperand(0).getReg()); + MI.setDesc(TII->get(IsHigh ? SystemZ::CIH :SystemZ::CFI)); + return true; + } + + case SystemZ::CRMux: + case SystemZ::CLRMux: { + // MI is a 32 bit compare with register pseudo instruction. Replace it + // with one of the three possible opcodes. "LowHi" is not supported and + // must therefore be swapped (in SystemZExpandPseudo.cpp). + bool Legal = true; + SystemZII::MuxInfo MuxI(MI, TRI); + if (MuxI.isLOW(0) && MuxI.isLOW(1)) + MUXLOWs++; + else if (MuxI.isHIGH(0) && MuxI.isHIGH(1)) + MUXHIGHs++; + else if (MuxI.isHIGH(0) && MuxI.isLOW(1)) + MUXHILOs++; + else { // LowHi + BADMUXs++; + Legal = false; + } + bool Success = MuxI.selectCmpMux(TII); + assert(Legal == Success && "MuxInfo broken?"); + return Success; + } + + case SystemZ::ARMux: + case SystemZ::ARMuxK: + case SystemZ::ALRMux: + case SystemZ::ALRMuxK: + case SystemZ::SRMux: + case SystemZ::SRMuxK: + case SystemZ::SLRMux: + case SystemZ::SLRMuxK: { + unsigned OldBadMuxStat = BADMUXs; + SystemZII::MuxInfo MuxI(MI, TRI); + if (MuxI.isLOW(0) && MuxI.isLOW(1) && MuxI.isLOW(2)) + MUXLOWs++; + else if (MuxI.isHIGH(0) && MuxI.isHIGH(1) && MuxI.isHIGH(2)) + MUXHIGHs++; + else if (MuxI.isHIGH(0) && (MuxI.isHIGH(1) || MuxI.isHIGH(2))) { + if (MuxI.isHIGH(2)) { // HLH is unsupported. + if (MuxI.isAdd(TII)) { + MuxI.commute(TII); + MUXHILOs++; + } + else + BADMUXs++; + } else + MUXHILOs++; + } + else + BADMUXs++; + + bool Success = MuxI.selectAddSubMux(TII, SI); + assert(Success == (OldBadMuxStat == BADMUXs) && "MuxInfo broken?"); + return Success; + } + + default: + break; + } + + return false; +} + +/// Iterate over the instructions in basic block MBB and select any +/// pseudo instructions. Return true if anything was modified. +bool SystemZSelectMux::selectMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= selectMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool SystemZSelectMux::runOnMachineFunction(MachineFunction &MF) { + TII = static_cast(MF.getSubtarget().getInstrInfo()); + SI = getAnalysisIfAvailable(); + TRI = MF.getSubtarget().getRegisterInfo(); + + bool Modified = false; + for (auto &MBB : MF) + Modified |= selectMBB(MBB); + return Modified; +} + Index: lib/Target/SystemZ/SystemZTargetMachine.cpp =================================================================== --- lib/Target/SystemZ/SystemZTargetMachine.cpp +++ lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -182,6 +182,7 @@ void addIRPasses() override; bool addInstSelector() override; bool addILPOpts() override; + void addPostRewrite() override; void addPreSched2() override; void addPreEmitPass() override; }; @@ -211,7 +212,15 @@ return true; } +void SystemZPassConfig::addPostRewrite() { + addPass(createSystemZSelectMuxPass(getSystemZTargetMachine())); +} + void SystemZPassConfig::addPreSched2() { + // This is added in addPostRewrite() when RAGreedy is run (at -O1 and above). + if (getOptLevel() == CodeGenOpt::None) + addPass(createSystemZSelectMuxPass(getSystemZTargetMachine())); + addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine())); if (getOptLevel() != CodeGenOpt::None) Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -387,7 +387,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const override; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; /// foldMemoryOperand - Same as the previous version except it allows folding /// of any load and store from / to any address, not just from a specific Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -4989,7 +4989,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, - int FrameIndex, LiveIntervals *LIS) const { + int FrameIndex, LiveIntervals *LIS, + VirtRegMap *VRM) const { // Check switch flag if (NoFusing) return nullptr; Index: test/CodeGen/SystemZ/debuginstr-00.mir =================================================================== --- test/CodeGen/SystemZ/debuginstr-00.mir +++ test/CodeGen/SystemZ/debuginstr-00.mir @@ -63,7 +63,7 @@ bb.1.bb2: liveins: $r0l - CHIMux renamable $r0l, 0, implicit-def $cc + CHI renamable $r0l, 0, implicit-def $cc BRC 14, 6, %bb.1, implicit killed $cc bb.2.bb3.preheader: Index: test/CodeGen/SystemZ/expand-mux-pseudos.mir =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/expand-mux-pseudos.mir @@ -0,0 +1,305 @@ +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -start-after=postrapseudos \ +# RUN: -verify-machineinstrs -enable-post-misched=false %s -o - | FileCheck %s +# +# Test that SystemZExpandPseudo can handle all combinations of high/low +# registers and transform all Add / Sub Mux instructions into legal target +# instructions. + +name: fun0 +body: | + bb.0: + successors: %bb.1 + $r0l = LHI 0 + $r0h = IIHF 0 + $r1l = LHI 0 + $r1h = IIHF 0 + $r2l = LHI 0 + $r2h = IIHF 0 + + ; CHECK-LABEL: fun0 + ; CHECK: llihl %r2, 0 + + ;; ARMuxK + + ; LLL + renamable $r2l = ARMuxK renamable $r1l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: ark %r2, %r1, %r0 + renamable $r2l = ARMuxK renamable $r2l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: ar %r2, %r0 + + ; LLH + renamable $r2l = ARMuxK renamable $r1l, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: ark %r2, %r1, %r2 + renamable $r2l = ARMuxK renamable $r2l, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhhr %r2, %r2, %r1 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = ARMuxK renamable $r2l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhlr %r2, %r2, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + + ; LHL + renamable $r2l = ARMuxK renamable $r1h, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: risblg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: ar %r2, %r1 + renamable $r2l = ARMuxK renamable $r1h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhhr %r2, %r1, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = ARMuxK renamable $r2h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhlr %r2, %r2, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + + ; LHH + renamable $r2l = ARMuxK renamable $r1h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhhr %r2, %r1, %r0 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = ARMuxK renamable $r2h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: risbhg %r2, %r2, 0, 159, 32 + ; CHECK-NEXT: ahhhr %r2, %r2, %r0 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = ARMuxK renamable $r0h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhlr %r2, %r0, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = ARMuxK renamable $r2h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r2, %r2, 0, 159, 32 + ; CHECK-NEXT: ar %r2, %r2 + + ; HLL + renamable $r2h = ARMuxK renamable $r1l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: ahhlr %r2, %r2, %r0 + renamable $r2h = ARMuxK renamable $r1l, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: ahhhr %r2, %r2, %r2 + + ; HLH + renamable $r2h = ARMuxK renamable $r1l, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: ahhlr %r2, %r0, %r1 + renamable $r2h = ARMuxK renamable $r1l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: ahhlr %r2, %r2, %r1 + renamable $r2h = ARMuxK renamable $r2l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: ahhlr %r2, %r2, %r2 + + ; HHL + renamable $r2h = ARMuxK renamable $r1h, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: ahhlr %r2, %r1, %r1 + ; HHH + renamable $r2h = ARMuxK renamable $r1h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: ahhhr %r2, %r1, %r0 + + ;; SRMuxK + + ; LLL + renamable $r2l = SRMuxK renamable $r1l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: srk %r2, %r1, %r0 + renamable $r2l = SRMuxK renamable $r2l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: sr %r2, %r0 + + ; LLH + renamable $r2l = SRMuxK renamable $r1l, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: srk %r2, %r1, %r2 + renamable $r2l = SRMuxK renamable $r2l, renamable $r1h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: shhhr %r2, %r2, %r1 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = SRMuxK renamable $r2l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: shhlr %r2, %r2, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + + ; LHL + renamable $r2l = SRMuxK renamable $r1h, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: risblg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: sr %r2, %r1 + renamable $r2l = SRMuxK renamable $r1h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: shhhr %r2, %r1, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = SRMuxK renamable $r2h, renamable $r2l, implicit-def dead $cc + ; CHECK-NEXT: lnr %r2, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: ahhlr %r2, %r2, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + + ; LHH + renamable $r2l = SRMuxK renamable $r1h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: shhhr %r2, %r1, %r0 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = SRMuxK renamable $r2h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: risbhg %r2, %r2, 0, 159, 32 + ; CHECK-NEXT: shhhr %r2, %r2, %r0 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = SRMuxK renamable $r0h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: shhlr %r2, %r0, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2l = SRMuxK renamable $r2h, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: risblg %r2, %r2, 0, 159, 32 + ; CHECK-NEXT: sr %r2, %r2 + + ; HLL + renamable $r2h = SRMuxK renamable $r1l, renamable $r0l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: shhlr %r2, %r2, %r0 + renamable $r2h = SRMuxK renamable $r1l, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: shhhr %r2, %r2, %r2 + + ; HLH + renamable $r2h = SRMuxK renamable $r1l, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: risbhg %r2, %r1, 0, 159, 32 + ; CHECK-NEXT: shhhr %r2, %r2, %r0 + renamable $r2h = SRMuxK renamable $r1l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: rllg %r2, %r2, 32 + ; CHECK-NEXT: srk %r2, %r1, %r2 + ; CHECK-NEXT: rllg %r2, %r2, 32 + renamable $r2h = SRMuxK renamable $r2l, renamable $r2h, implicit-def dead $cc + ; CHECK-NEXT: xihf %r2, 4294967295 + ; CHECK-NEXT: aih %r2, 1 + ; CHECK-NEXT: ahhlr %r2, %r2, %r2 + + ; HHL + renamable $r2h = SRMuxK renamable $r1h, renamable $r1l, implicit-def dead $cc + ; CHECK-NEXT: shhlr %r2, %r1, %r1 + + ; HHH + renamable $r2h = SRMuxK renamable $r1h, renamable $r0h, implicit-def dead $cc + ; CHECK-NEXT: shhhr %r2, %r1, %r0 + + bb.1: + successors: + +... + +# Test comparisons. + +--- +name: fun1 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + $r0l = LHI 0 + $r0h = IIHF 0 + $r1l = LHI 0 + $r1h = IIHF 0 + $r2l = LHI 0 + $r2h = IIHF 0 + + ; CHECK-LABEL: fun1 + ; CHECK: iihf %r2, 0 + + CRMux renamable $r1l, renamable $r2l, implicit-def $cc + BRC 14, 4, %bb.2, implicit killed $cc + ; CHECK-NEXT: crjl %r1, %r2, .LBB1_2 + + bb.1: + successors: + $r0l = LHI 0 + + bb.2: + successors: %bb.3, %bb.4 + liveins: $r1h, $r2l + + CRMux renamable $r1h, renamable $r2l, implicit-def $cc + BRC 14, 4, %bb.4, implicit killed $cc + ; CHECK-LABEL: .LBB1_2: + ; CHECK-NEXT: chlr %r1, %r2 + ; CHECK-NEXT: jl .LBB1_4 + + bb.3: + successors: + $r0l = LHI 0 + + bb.4: + successors: %bb.5, %bb.6 + liveins: $r1h, $r2h + + CRMux renamable $r1h, renamable $r2h, implicit-def $cc + BRC 14, 4, %bb.6, implicit killed $cc + ; CHECK-LABEL: .LBB1_4: + ; CHECK-NEXT: chhr %r1, %r2 + ; CHECK-NEXT: jl .LBB1_6 + + bb.5: + successors: + $r0l = LHI 0 + + bb.6: + successors: %bb.7, %bb.8 + liveins: $r1l, $r2h + + CRMux renamable $r1l, renamable $r2h, implicit-def $cc + BRC 14, 4, %bb.8, implicit killed $cc + ; CHECK-LABEL: .LBB1_6: + ; CHECK-NEXT: chlr %r2, %r1 + ; CHECK-NEXT: jh .LBB1_8 + + bb.7: + successors: + $r0l = LHI 0 + + bb.8: + liveins: $r1l, $r2h + + CRMux renamable $r1l, renamable $r2h, implicit-def $cc + ; CHECK-LABEL: .LBB1_8: + ; CHECK-NEXT: rllg %r1, %r1, 32 + ; CHECK-NEXT: chhr %r1, %r2 + ; CHECK-NEXT: rllg %r1, %r1, 32 + ; CHECK-NEXT: jl .LBB1_10 + + bb.9: + successors: %bb.10, %bb.11 + liveins: $cc + BRC 14, 4, %bb.11, implicit killed $cc + + bb.10: + successors: + $r0l = LHI 0 + + bb.11: + successors: %bb.12, %bb.13 + liveins: $r1l, $r1h, $r2l + + CRMux renamable $r1h, renamable $r2l, implicit-def $cc + renamable $r1l = LOCHI killed renamable $r1l, 0, 14, 8, implicit $cc + BRC 14, 4, %bb.13, implicit killed $cc + ; CHECK-LABEL: .LBB1_10: + ; CHECK-NEXT: chlr %r1, %r2 + ; CHECK-NEXT: lochie %r1, 0 + ; CHECK-NEXT: jl .LBB1_12 + + bb.12: + successors: + $r0l = LHI 0 + + bb.13: + successors: %bb.14, %bb.15 + liveins: $r1l, $r1h, $r2l + + CRMux renamable $r2l, renamable $r1h, implicit-def $cc + renamable $r1l = LOCHI killed renamable $r1l, 0, 14, 8, implicit $cc + BRC 14, 4, %bb.15, implicit killed $cc + ; CHECK-LABEL: .LBB1_12: + ; CHECK-NEXT: chlr %r1, %r2 + ; CHECK-NEXT: lochie %r1, 0 + ; CHECK-NEXT: jh .LBB1_14 + + bb.14: + successors: + $r0l = LHI 0 + + bb.15: + +...