diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -275,6 +275,11 @@ return isLoadFromStackSlot(MI, FrameIndex); } + virtual const MachineOperand *isLoadFromStackSlotMO(const MachineInstr &MI, + int &FrameIndex) const { + llvm_unreachable("target did not implement"); + } + /// Check for post-frame ptr elimination stack locations as well. /// This uses a heuristic so it isn't reliable for correctness. virtual unsigned isLoadFromStackSlotPostFE(const MachineInstr &MI, @@ -302,6 +307,11 @@ return 0; } + virtual const MachineOperand *isStoreToStackSlotMO(const MachineInstr &MI, + int &FrameIndex) const { + llvm_unreachable("target did not implement"); + } + /// Optional extension of isStoreToStackSlot that returns the number of /// bytes stored to the stack. This must be implemented if a backend /// supports partial stack slot spills/loads to further disambiguate @@ -1996,6 +2006,17 @@ return MI.getOperand(0); } + /// \Returns true if a spill/reload of size \p MemBits can be handled by + /// Spill2Reg. + virtual bool isLegalToSpill2Reg(unsigned MemBits, unsigned Opcode) const { + llvm_unreachable( + "Target didn't implement TargetInstrInfo::isLegalToSpill2Reg!"); + } + + virtual bool targetSupportsSpill2Reg(const TargetSubtargetInfo *STI) const { + return false; + } + private: mutable std::unique_ptr Formatter; unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; diff --git a/llvm/lib/CodeGen/Spill2Reg.cpp b/llvm/lib/CodeGen/Spill2Reg.cpp --- a/llvm/lib/CodeGen/Spill2Reg.cpp +++ b/llvm/lib/CodeGen/Spill2Reg.cpp @@ -15,10 +15,15 @@ /// //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" using namespace llvm; @@ -33,6 +38,61 @@ void getAnalysisUsage(AnalysisUsage &AU) const override; void releaseMemory() override; bool runOnMachineFunction(MachineFunction &) override; + +private: + /// Holds data for spills and reloads. + struct StackSlotDataEntry { + /// This is set to true to disable code generation for the spills/reloads + /// that we collected in this entry. + bool Disable = false; + /// Indentation for the dump() methods. + static constexpr const int DumpInd = 2; + + /// The data held for each spill/reload. + struct MIData { + MIData(MachineInstr *MI, const MachineOperand *MO, unsigned SpillBits) + : MI(MI), MO(MO), SpillBits(SpillBits) {} + /// The Spill/Reload instruction. + MachineInstr *MI = nullptr; + /// The operand being spilled/reloaded. + const MachineOperand *MO = nullptr; + /// The size of the data spilled/reloaded in bits. This occasionally + /// differs across accesses to the same stack slot. + unsigned SpillBits = 0; +#ifndef NDEBUG + LLVM_DUMP_METHOD void dump() const; +#endif + }; + SmallVector Spills; + SmallVector Reloads; + + /// \Returns the register class of the register spilled/reloaded. + const TargetRegisterClass *getRegClass(MachineRegisterInfo *MRI) const { + Register SpillReg = Spills.front().MO->getReg(); + return MRI->getRegClass(SpillReg); + } +#ifndef NDEBUG + LLVM_DUMP_METHOD void dump() const; +#endif + }; + /// Look for candidates for spill2reg. These candidates are in places with + /// high memoru unit contention. Fills in StackSlotData. + void collectSpillsAndReloads(); + /// Replace spills to stack with spills to registers (same for reloads). + void generateCode(); + /// Cleanup data structures once the pass is finished. + void cleanup(); + /// The main entry point for this pass. + bool run(); + + /// Map from a stack slot to the corresponding spills and reloads. + DenseMap StackSlotData; + + MachineFunction *MF = nullptr; + MachineRegisterInfo *MRI = nullptr; + MachineFrameInfo *MFI = nullptr; + const TargetInstrInfo *TII = nullptr; + const TargetRegisterInfo *TRI = nullptr; }; } // namespace @@ -45,12 +105,112 @@ void Spill2Reg::releaseMemory() {} bool Spill2Reg::runOnMachineFunction(MachineFunction &MFn) { - llvm_unreachable("Unimplemented"); + // Disable if NoImplicitFloat to avoid emitting instrs that use vectors. + if (MFn.getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) + return false; + + MF = &MFn; + MRI = &MF->getRegInfo(); + MFI = &MF->getFrameInfo(); + TII = MF->getSubtarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + // Enable only if the target supports the appropriate vector instruction set. + if (!TII->targetSupportsSpill2Reg(&MF->getSubtarget())) + return false; + + return run(); } char Spill2Reg::ID = 0; char &llvm::Spill2RegID = Spill2Reg::ID; +void Spill2Reg::collectSpillsAndReloads() { + /// The checks for collecting spills and reloads are identical, so we keep + /// them here in one place. Return true if we should not collect this. + auto SkipEntry = [this](int StackSlot, unsigned SpillBits, + unsigned Opcode) -> bool { + // If not a spill/reload stack slot. + if (!MFI->isSpillSlotObjectIndex(StackSlot)) + return true; + // Check size in bits. + if (!TII->isLegalToSpill2Reg(SpillBits, Opcode)) + return true; + return false; + }; + + // Collect spills and reloads and associate them to stack slots. + // If any spill/reload for a stack slot is found not to be eligible for + // spill-to-reg, then that stack slot is disabled. + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &MI : MBB) { + int StackSlot; + if (const MachineOperand *MO = TII->isStoreToStackSlotMO(MI, StackSlot)) { + MachineInstr *Spill = &MI; + auto &Entry = StackSlotData[StackSlot]; + unsigned SpillBits = TRI->getRegSizeInBits(MO->getReg(), *MRI); + if (SkipEntry(StackSlot, SpillBits, MI.getOpcode())) { + Entry.Disable = true; + continue; + } + Entry.Spills.emplace_back(Spill, MO, SpillBits); + } else if (const MachineOperand *MO = + TII->isLoadFromStackSlotMO(MI, StackSlot)) { + MachineInstr *Reload = &MI; + auto &Entry = StackSlotData[StackSlot]; + unsigned SpillBits = TRI->getRegSizeInBits(MO->getReg(), *MRI); + if (SkipEntry(StackSlot, SpillBits, MI.getOpcode())) { + Entry.Disable = true; + continue; + } + assert(Reload->getRestoreSize(TII) && "Expected reload"); + Entry.Reloads.emplace_back(Reload, MO, SpillBits); + } else { + // This should capture uses of the stack in instructions that access + // memory (e.g., folded spills/reloads) and non-memory instructions, + // like x86 LEA. + for (const MachineOperand &MO : MI.operands()) + if (MO.isFI()) { + int StackSlot = MO.getIndex(); + auto &Entry = StackSlotData[StackSlot]; + Entry.Disable = true; + } + } + } + } +} + +void Spill2Reg::generateCode() { llvm_unreachable("Unimplemented"); } + +void Spill2Reg::cleanup() { StackSlotData.clear(); } + +bool Spill2Reg::run() { + // Walk over each instruction in the code keeping track of the processor's + // port pressure and look for memory unit hot-spots. + collectSpillsAndReloads(); + + // Replace each spills/reloads to stack slots with register spills/reloads. + generateCode(); + + cleanup(); + return true; +} + +#ifndef NDEBUG +void Spill2Reg::StackSlotDataEntry::MIData::dump() const { + dbgs() << " (" << *MO << ") " << *MI; +} + +void Spill2Reg::StackSlotDataEntry::dump() const { + dbgs().indent(DumpInd) << "Disable: " << Disable << "\n"; + dbgs().indent(DumpInd) << "Spills:\n"; + for (const MIData &Data : Spills) + Data.dump(); + dbgs().indent(DumpInd) << "Reloads:\n"; + for (const MIData &Data : Reloads) + Data.dump(); +} +#endif + INITIALIZE_PASS_BEGIN(Spill2Reg, "spill2reg", "Spill2Reg", false, false) INITIALIZE_PASS_END(Spill2Reg, "spill2reg", "Spill2Reg", false, false) diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -212,6 +212,14 @@ unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; + + const MachineOperand *isLoadFromStackSlotMO(const MachineInstr &MI, + int &FrameIndex, + unsigned &MemBytes) const; + + const MachineOperand *isLoadFromStackSlotMO(const MachineInstr &MI, + int &FrameIndex) const override; + unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex, unsigned &MemBytes) const override; @@ -223,6 +231,14 @@ unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; + + const MachineOperand *isStoreToStackSlotMO(const MachineInstr &MI, + int &FrameIndex, + unsigned &MemBytes) const; + + const MachineOperand *isStoreToStackSlotMO(const MachineInstr &MI, + int &FrameIndex) const override; + unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex, unsigned &MemBytes) const override; @@ -645,6 +661,10 @@ Register SrcReg2, int64_t ImmMask, int64_t ImmValue, const MachineInstr &OI, bool *IsSwapped, int64_t *ImmDelta) const; + + bool isLegalToSpill2Reg(unsigned MemBits, unsigned Opcode) const override; + + bool targetSupportsSpill2Reg(const TargetSubtargetInfo *STI) const override; }; } // namespace llvm diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -887,13 +887,27 @@ return X86InstrInfo::isLoadFromStackSlot(MI, FrameIndex, Dummy); } +const MachineOperand * +X86InstrInfo::isLoadFromStackSlotMO(const MachineInstr &MI, int &FrameIndex, + unsigned &MemBytes) const { + if (isFrameLoadOpcode(MI.getOpcode(), MemBytes)) + if (MI.getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex)) + return &MI.getOperand(0); + return nullptr; +} + +const MachineOperand * +X86InstrInfo::isLoadFromStackSlotMO(const MachineInstr &MI, + int &FrameIndex) const { + unsigned UnusedMemBytes; + return isLoadFromStackSlotMO(MI, FrameIndex, UnusedMemBytes); +} + unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex, unsigned &MemBytes) const { - if (isFrameLoadOpcode(MI.getOpcode(), MemBytes)) - if (MI.getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex)) - return MI.getOperand(0).getReg(); - return 0; + const MachineOperand *MO = isLoadFromStackSlotMO(MI, FrameIndex, MemBytes); + return MO != nullptr ? (unsigned)MO->getReg() : 0; } unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, @@ -921,14 +935,28 @@ return X86InstrInfo::isStoreToStackSlot(MI, FrameIndex, Dummy); } -unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI, - int &FrameIndex, - unsigned &MemBytes) const { +const MachineOperand * +X86InstrInfo::isStoreToStackSlotMO(const MachineInstr &MI, int &FrameIndex, + unsigned &MemBytes) const { if (isFrameStoreOpcode(MI.getOpcode(), MemBytes)) if (MI.getOperand(X86::AddrNumOperands).getSubReg() == 0 && isFrameOperand(MI, 0, FrameIndex)) - return MI.getOperand(X86::AddrNumOperands).getReg(); - return 0; + return &MI.getOperand(X86::AddrNumOperands); + return nullptr; +} + +const MachineOperand * +X86InstrInfo::isStoreToStackSlotMO(const MachineInstr &MI, + int &FrameIndex) const { + unsigned UnusedMemBytes; + return isStoreToStackSlotMO(MI, FrameIndex, UnusedMemBytes); +} + +unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex, + unsigned &MemBytes) const { + const MachineOperand *MO = isStoreToStackSlotMO(MI, FrameIndex, MemBytes); + return MO != nullptr ? (unsigned)MO->getReg() : 0; } unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, @@ -9457,5 +9485,33 @@ return It; } +bool X86InstrInfo::isLegalToSpill2Reg(unsigned MemBits, unsigned Opcode) const { + switch (Opcode) { + // Skip instructions like `$k1 = KMOVWkm %stack.1` because replacing stack + // with xmm0 results in an illegal instruction `movq %k1, %xmm0`. + case X86::KMOVBkm: + case X86::KMOVBmk: + case X86::KMOVWkm: + case X86::KMOVWmk: + case X86::KMOVDkm: + case X86::KMOVDmk: + case X86::KMOVQkm: + case X86::KMOVQmk: + return false; + } + switch (MemBits) { + case 64: + case 32: + return true; + } + return false; +} + +bool X86InstrInfo::targetSupportsSpill2Reg( + const TargetSubtargetInfo *STI) const { + const X86Subtarget *X86STI = static_cast(STI); + return X86STI->hasSSE41(); +} + #define GET_INSTRINFO_HELPERS #include "X86GenInstrInfo.inc"