Index: include/llvm/ADT/SparseSet.h =================================================================== --- include/llvm/ADT/SparseSet.h +++ include/llvm/ADT/SparseSet.h @@ -23,6 +23,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Allocator.h" +#include #include #include #include @@ -128,7 +129,6 @@ using KeyT = typename KeyFunctorT::argument_type; using DenseT = SmallVector; - using size_type = unsigned; DenseT Dense; SparseT *Sparse = nullptr; unsigned Universe = 0; @@ -141,6 +141,7 @@ using const_reference = const ValueT &; using pointer = ValueT *; using const_pointer = const ValueT *; + using size_type = unsigned; SparseSet() = default; SparseSet(const SparseSet &) = delete; @@ -154,17 +155,27 @@ /// @param U Universe size. All object keys must be less than U. /// void setUniverse(unsigned U) { - // It's not hard to resize the universe on a non-empty set, but it doesn't - // seem like a likely use case, so we can add that code when we need it. - assert(empty() && "Can only resize universe on an empty map"); // Hysteresis prevents needless reallocations. if (U >= Universe/4 && U <= Universe) return; - free(Sparse); + if (U > Universe) + U = std::max(U, 2 * Universe); + // The Sparse array doesn't actually need to be initialized, so malloc // would be enough here, but that will cause tools like valgrind to // complain about branching on uninitialized data. - Sparse = static_cast(safe_calloc(U, sizeof(SparseT))); + SparseT *S = static_cast(safe_calloc(U, sizeof(SparseT))); + + // Record already inserted elements in the new Sparse array. + for (unsigned i = 0, e = size(); i < e; i++) { + unsigned Idx = ValIndexOf(Dense[i]); + assert(Idx <= U && "Index of an already inserted element is bigger than " + "the new universe size"); + S[Idx] = i; + } + + free(Sparse); + Sparse = S; Universe = U; } Index: include/llvm/CodeGen/LiveRangeEdit.h =================================================================== --- include/llvm/CodeGen/LiveRangeEdit.h +++ include/llvm/CodeGen/LiveRangeEdit.h @@ -40,6 +40,7 @@ class MachineLoopInfo; class MachineOperand; class TargetInstrInfo; +class TargetRegisterClass; class TargetRegisterInfo; class VirtRegMap; @@ -177,8 +178,10 @@ return makeArrayRef(NewRegs).slice(FirstNew); } - /// createFrom - Create a new virtual register based on OldReg. - unsigned createFrom(unsigned OldReg); + /// createFrom - Create a new virtual register based on OldReg. If RC is + /// specified then the register will have this class, else the class of OldReg + /// is used. + unsigned createFrom(unsigned OldReg, const TargetRegisterClass *RC = nullptr); /// create - Create a new register with the same class and original slot as /// parent. Index: include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- include/llvm/CodeGen/TargetInstrInfo.h +++ include/llvm/CodeGen/TargetInstrInfo.h @@ -898,6 +898,29 @@ "TargetInstrInfo::loadRegFromStackSlot!"); } + /// Return a register class that is appropriate for stack save/restore of the + /// given register class. + /// + /// For instance, Thumb1 does not provide instructions to directly + /// save/restore high registers. Storing a high register must be done by first + /// copying the value in a low register and then saving this register. + /// Similarly, reload requires an adequately reversed sequence. For this case, + /// the method returns the low-register class when given the high-register + /// class. + /// + /// This allows to allocate a new register with the returned class and insert + /// a COPY instruction before/after the store/load created by + /// storeRegToStackSlot()/loadRegFromStackSlot(): + /// %1:save-restore-class = COPY %0:original-class + /// STR %1:save-restore-class, %stack.1 + /// + /// %1:save-restore-class = LDR %stack.1 + /// %0:original-class = COPY %1:save-restore-class + virtual const TargetRegisterClass * + getRegClassForStackSaveRestore(const TargetRegisterClass *RC) const { + return RC; + } + /// This function is called for all pseudo instructions /// that remain after register allocation. Many pseudo instructions are /// created to help register allocation. This is the place to convert them Index: lib/CodeGen/InlineSpiller.cpp =================================================================== --- lib/CodeGen/InlineSpiller.cpp +++ lib/CodeGen/InlineSpiller.cpp @@ -222,7 +222,7 @@ bool foldMemoryOperand(ArrayRef>, MachineInstr *LoadMI = nullptr); void insertReload(unsigned VReg, SlotIndex, MachineBasicBlock::iterator MI); - void insertSpill(unsigned VReg, bool isKill, MachineBasicBlock::iterator MI); + void insertSpill(unsigned VReg, MachineBasicBlock::iterator MI); void spillAroundUses(unsigned Reg); void spillAll(); @@ -872,8 +872,21 @@ MachineBasicBlock &MBB = *MI->getParent(); MachineInstrSpan MIS(MI); - TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot, - MRI.getRegClass(NewVReg), &TRI); + unsigned LoadReg = NewVReg; + const TargetRegisterClass &RC = *MRI.getRegClass(NewVReg); + const TargetRegisterClass &LoadRC = *TII.getRegClassForStackSaveRestore(&RC); + if (&RC != &LoadRC) { + LoadReg = Edit->createFrom(NewVReg, &LoadRC); + LLVM_DEBUG(dbgs() << "Using " << printReg(LoadReg, &TRI) << ":" + << TRI.getRegClassName(&LoadRC) + << " as an intermediate for the reload\n"); + } + + TII.loadRegFromStackSlot(MBB, MI, LoadReg, StackSlot, &LoadRC, &TRI); + + if (&RC != &LoadRC) + BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(TargetOpcode::COPY), NewVReg) + .addReg(LoadReg, RegState::Kill); LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI); @@ -897,31 +910,47 @@ } /// insertSpill - Insert a spill of NewVReg after MI. -void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, - MachineBasicBlock::iterator MI) { +void InlineSpiller::insertSpill(unsigned NewVReg, + MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - MachineInstrSpan MIS(MI); + MachineBasicBlock::iterator InsertMI = std::next(MI); bool IsRealSpill = true; if (isFullUndefDef(*MI)) { // Don't spill undef value. // Anything works for undef, in particular keeping the memory // uninitialized is a viable option and it saves code size and // run time. - BuildMI(MBB, std::next(MI), MI->getDebugLoc(), TII.get(TargetOpcode::KILL)) - .addReg(NewVReg, getKillRegState(isKill)); + BuildMI(MBB, InsertMI, MI->getDebugLoc(), TII.get(TargetOpcode::KILL)) + .addReg(NewVReg, RegState::Kill); IsRealSpill = false; - } else - TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot, - MRI.getRegClass(NewVReg), &TRI); + } else { + unsigned StoreReg = NewVReg; + const TargetRegisterClass &RC = *MRI.getRegClass(NewVReg); + const TargetRegisterClass &StoreRC = + *TII.getRegClassForStackSaveRestore(&RC); + if (&RC != &StoreRC) { + StoreReg = Edit->createFrom(NewVReg, &StoreRC); + LLVM_DEBUG(dbgs() << "Using " << printReg(StoreReg, &TRI) << ":" + << TRI.getRegClassName(&StoreRC) + << " as an intermediate for the spill\n"); + + BuildMI(MBB, InsertMI, MI->getDebugLoc(), TII.get(TargetOpcode::COPY), + StoreReg) + .addReg(NewVReg, RegState::Kill); + } + + TII.storeRegToStackSlot(MBB, InsertMI, StoreReg, RegState::Kill, StackSlot, + &StoreRC, &TRI); + } - LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end()); + LIS.InsertMachineInstrRangeInMaps(std::next(MI), InsertMI); - LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS, + LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), InsertMI, LIS, "spill")); ++NumSpills; if (IsRealSpill) - HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original); + HSpiller.addToMergeableSpills(*std::prev(InsertMI), StackSlot, Original); } /// spillAroundUses - insert spill code around each use of Reg. @@ -1021,7 +1050,7 @@ // FIXME: Use a second vreg if instruction has no tied ops. if (RI.Writes) if (hasLiveDef) - insertSpill(NewVReg, true, MI); + insertSpill(NewVReg, MI); } } Index: lib/CodeGen/LiveRangeEdit.cpp =================================================================== --- lib/CodeGen/LiveRangeEdit.cpp +++ lib/CodeGen/LiveRangeEdit.cpp @@ -52,8 +52,11 @@ return LI; } -unsigned LiveRangeEdit::createFrom(unsigned OldReg) { - unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); +unsigned LiveRangeEdit::createFrom(unsigned OldReg, + const TargetRegisterClass *RC) { + if (RC == nullptr) + RC = MRI.getRegClass(OldReg); + unsigned VReg = MRI.createVirtualRegister(RC); if (VRM) { VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); } Index: lib/CodeGen/RegAllocFast.cpp =================================================================== --- lib/CodeGen/RegAllocFast.cpp +++ lib/CodeGen/RegAllocFast.cpp @@ -189,9 +189,8 @@ void usePhysReg(MachineOperand &MO); void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg, - RegState NewState); + RegState NewState, bool IsUsedInInstr = false); unsigned calcSpillCost(MCPhysReg PhysReg) const; - void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg); LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) { return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); @@ -201,7 +200,9 @@ return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); } - void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint); + bool allocVirtReg(MachineInstr &MI, unsigned VirtReg, unsigned Hint, + MCPhysReg *PhysReg, bool IsUsedInInstr); + void assignVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint); MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, unsigned Hint); LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, @@ -215,6 +216,11 @@ void reload(MachineBasicBlock::iterator Before, unsigned VirtReg, MCPhysReg PhysReg); + unsigned createVirtReg(const TargetRegisterClass &RC); + void handleIntermediarySpill(MachineBasicBlock::iterator BeginMII, + MachineBasicBlock::iterator EndMII, + unsigned VirtReg); + void dumpState(); }; @@ -259,9 +265,34 @@ LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n'); const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); - TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI); + const TargetRegisterClass &StoreRC = + *TII->getRegClassForStackSaveRestore(&RC); + + MachineBasicBlock::iterator PrevMII = + Before == MBB->begin() ? MBB->end() : std::prev(Before); + unsigned StoreReg = AssignedReg; + bool NeedsIntermediary = &RC != &StoreRC && !StoreRC.contains(StoreReg); + if (NeedsIntermediary) { + assert(&StoreRC == TII->getRegClassForStackSaveRestore(&StoreRC) && + "Invalid regclass cascade for stack save"); + StoreReg = createVirtReg(StoreRC); + LLVM_DEBUG(dbgs() << "Using " << printReg(StoreReg, TRI) << ":" + << TRI->getRegClassName(&StoreRC) + << " as an intermediary for the spill\n"); + + BuildMI(*MBB, Before, Before->getDebugLoc(), TII->get(TargetOpcode::COPY), + StoreReg) + .addReg(AssignedReg, llvm::RegState::Kill); + } + + TII->storeRegToStackSlot(*MBB, Before, StoreReg, Kill, FI, &StoreRC, TRI); ++NumStores; + if (NeedsIntermediary) + handleIntermediarySpill(PrevMII == MBB->end() ? MBB->begin() + : std::next(PrevMII), + Before, StoreReg); + // If this register is used by DBG_VALUE then insert new DBG_VALUE to // identify spilled location as the place to find corresponding variable's // value. @@ -285,8 +316,32 @@ << printReg(PhysReg, TRI) << '\n'); int FI = getStackSpaceFor(VirtReg); const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); - TII->loadRegFromStackSlot(*MBB, Before, PhysReg, FI, &RC, TRI); + const TargetRegisterClass &LoadRC = *TII->getRegClassForStackSaveRestore(&RC); + + MachineBasicBlock::iterator PrevMII = + Before == MBB->begin() ? MBB->end() : std::prev(Before); + unsigned LoadReg = PhysReg; + bool NeedsIntermediary = &RC != &LoadRC && !LoadRC.contains(LoadReg); + if (NeedsIntermediary) { + assert(&LoadRC == TII->getRegClassForStackSaveRestore(&LoadRC) && + "Invalid regclass cascade for stack restore"); + LoadReg = createVirtReg(LoadRC); + LLVM_DEBUG(dbgs() << "Using " << printReg(LoadReg, TRI) << ":" + << TRI->getRegClassName(&LoadRC) + << " as an intermediary for the reload\n"); + } + + TII->loadRegFromStackSlot(*MBB, Before, LoadReg, FI, &LoadRC, TRI); ++NumLoads; + + if (NeedsIntermediary) { + BuildMI(*MBB, Before, Before->getDebugLoc(), TII->get(TargetOpcode::COPY), + PhysReg) + .addReg(LoadReg, llvm::RegState::Kill); + handleIntermediarySpill(PrevMII == MBB->end() ? MBB->begin() + : std::next(PrevMII), + Before, LoadReg); + } } /// Return true if MO is the only remaining reference to its virtual register, @@ -456,8 +511,10 @@ /// similar to defineVirtReg except the physreg is reserved instead of /// allocated. void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, - MCPhysReg PhysReg, RegState NewState) { - markRegUsedInInstr(PhysReg); + MCPhysReg PhysReg, RegState NewState, + bool IsUsedInInstr) { + if (IsUsedInInstr) + markRegUsedInInstr(PhysReg); switch (unsigned VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; @@ -542,23 +599,10 @@ return Cost; } -/// This method updates local state so that we know that PhysReg is the -/// proper container for VirtReg now. The physical register must not be used -/// for anything else when this is called. -void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) { - unsigned VirtReg = LR.VirtReg; - LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to " - << printReg(PhysReg, TRI) << '\n'); - assert(LR.PhysReg == 0 && "Already assigned a physreg"); - assert(PhysReg != 0 && "Trying to assign no register"); - LR.PhysReg = PhysReg; - setPhysRegState(PhysReg, VirtReg); -} - /// Allocates a physical register for VirtReg. -void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) { - const unsigned VirtReg = LR.VirtReg; - +bool RegAllocFast::allocVirtReg(MachineInstr &MI, unsigned VirtReg, + unsigned Hint, MCPhysReg *OutPhysReg, + bool IsUsedInInstr) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Can only allocate virtual registers"); @@ -573,9 +617,9 @@ unsigned Cost = calcSpillCost(Hint); if (Cost < spillDirty) { if (Cost) - definePhysReg(MI, Hint, regFree); - assignVirtToPhysReg(LR, Hint); - return; + definePhysReg(MI, Hint, regFree, IsUsedInInstr); + *OutPhysReg = Hint; + return true; } } @@ -583,8 +627,8 @@ ArrayRef AllocationOrder = RegClassInfo.getOrder(&RC); for (MCPhysReg PhysReg : AllocationOrder) { if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) { - assignVirtToPhysReg(LR, PhysReg); - return; + *OutPhysReg = PhysReg; + return true; } } @@ -599,8 +643,8 @@ LLVM_DEBUG(dbgs() << "Cost: " << Cost << " BestCost: " << BestCost << '\n'); // Cost is 0 when all aliases are already disabled. if (Cost == 0) { - assignVirtToPhysReg(LR, PhysReg); - return; + *OutPhysReg = PhysReg; + return true; } if (Cost < BestCost) { BestReg = PhysReg; @@ -608,19 +652,37 @@ } } - if (!BestReg) { + if (BestReg) { + definePhysReg(MI, BestReg, regFree, IsUsedInInstr); + *OutPhysReg = BestReg; + return true; + } + + *OutPhysReg = *AllocationOrder.begin(); + return false; +} + +void RegAllocFast::assignVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) { + assert(LR.PhysReg == 0 && "Already assigned a physreg"); + + const unsigned VirtReg = LR.VirtReg; + MCPhysReg PhysReg; + bool Defined = allocVirtReg(MI, VirtReg, Hint, &PhysReg, true); + if (!Defined) { // Nothing we can do. Report an error and keep going with a bad allocation. if (MI.isInlineAsm()) MI.emitError("inline assembly requires more registers than available"); else MI.emitError("ran out of registers during register allocation"); - definePhysReg(MI, *AllocationOrder.begin(), regFree); - assignVirtToPhysReg(LR, *AllocationOrder.begin()); - return; + definePhysReg(MI, PhysReg, regFree); } - definePhysReg(MI, BestReg, regFree); - assignVirtToPhysReg(LR, BestReg); + // Update local state so that we know that PhysReg is the proper container for + // VirtReg now. + LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to " + << printReg(PhysReg, TRI) << '\n'); + LR.PhysReg = PhysReg; + setPhysRegState(PhysReg, VirtReg); } /// Allocates a register for VirtReg and mark it as dirty. @@ -640,7 +702,7 @@ if (UseMI.isCopyLike()) Hint = UseMI.getOperand(0).getReg(); } - allocVirtReg(MI, *LRI, Hint); + assignVirtReg(MI, *LRI, Hint); } else if (LRI->LastUse) { // Redefining a live register - kill at the last use, unless it is this // instruction defining VirtReg multiple times. @@ -667,7 +729,7 @@ std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); MachineOperand &MO = MI.getOperand(OpNum); if (!LRI->PhysReg) { - allocVirtReg(MI, *LRI, Hint); + assignVirtReg(MI, *LRI, Hint); reload(MI, VirtReg, LRI->PhysReg); } else if (LRI->Dirty) { if (isLastUseOfLocalReg(MO)) { @@ -734,6 +796,69 @@ return Dead; } +/// Create a new virtual register for use by the allocator. +unsigned RegAllocFast::createVirtReg(const TargetRegisterClass &RC) { + unsigned Reg = MRI->createVirtualRegister(&RC); + unsigned NumVirtRegs = MRI->getNumVirtRegs(); + StackSlotForVirtReg.resize(NumVirtRegs); + LiveVirtRegs.setUniverse(NumVirtRegs); + return Reg; +} + +/// Process a spill/reload sequence that uses an intermediary register. The +/// method expects an instruction range implementing the spill/reload and id of +/// the new intermediary register. The intermediary is allocated to a physical +/// register and the instruction sequence is appropriately updated. +void RegAllocFast::handleIntermediarySpill(MachineBasicBlock::iterator BeginMII, + MachineBasicBlock::iterator EndMII, + unsigned VirtReg) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Not a virtual register"); + + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + LLVM_DEBUG(dbgs() << "Allocating intermediary register " + << printReg(VirtReg, TRI) << ":" + << TRI->getRegClassName(&RC) + << " to a physical register\n"); + + // Allocate the intermediary virtual register to a physical register. + MCPhysReg InterPhysReg; + bool Defined = allocVirtReg(*BeginMII, VirtReg, 0, &InterPhysReg, false); + if (!Defined) { + // If an instruction uses a large number of registers (for instance, it is a + // complex INLINEASM), it is possible that all registers that can store the + // intermediary are already in use. In that case, one of these registers is + // temporarily spilled so the intermediary can be allocated. + // + // Note: The target must guarantee that an intermediary register can be + // successfully stored/loaded without modifying content of any of its super + // registers. + int FI = getStackSpaceFor(VirtReg); + + LLVM_DEBUG(dbgs() << "Temporarily spilling " << printReg(InterPhysReg, TRI) + << " to stack slot #" << FI + << " to allocate intermediary register " + << printReg(VirtReg, TRI) << ":" + << TRI->getRegClassName(&RC) << "\n"); + + // TODO Fix debug information for the spill (DBG_VALUE). + TII->storeRegToStackSlot(*MBB, BeginMII, InterPhysReg, true, FI, &RC, TRI); + ++NumStores; + + TII->loadRegFromStackSlot(*MBB, EndMII, InterPhysReg, FI, &RC, TRI); + ++NumLoads; + } + + // Update the intermediary register in the spill sequence. + for (MachineInstr &MI : make_range(BeginMII, EndMII)) { + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI.getOperand(I); + if (MO.isReg() && MO.getReg() == VirtReg) + setPhysReg(MI, I, InterPhysReg); + } + } +} + // Handles special instruction operand like early clobbers and tied ops when // there are additional physreg defines. void RegAllocFast::handleThroughOperands(MachineInstr &MI, @@ -1016,6 +1141,19 @@ } } + unsigned DefOpEnd = MI.getNumOperands(); + if (MI.isCall()) { + // Spill all virtregs before a call. This serves one purpose: If an + // exception is thrown, the landing pad is going to expect to find + // registers in their spill slots. + // Note: although this is appealing to just consider all definitions + // as call-clobbered, this is not correct because some of those + // definitions may be used later on and we do not want to reuse + // those for virtual registers in between. + LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n"); + spillAll(MI); + } + // Track registers defined by instruction - early clobbers and tied uses at // this point. UsedInInstr.clear(); @@ -1030,19 +1168,6 @@ } } - unsigned DefOpEnd = MI.getNumOperands(); - if (MI.isCall()) { - // Spill all virtregs before a call. This serves one purpose: If an - // exception is thrown, the landing pad is going to expect to find - // registers in their spill slots. - // Note: although this is appealing to just consider all definitions - // as call-clobbered, this is not correct because some of those - // definitions may be used later on and we do not want to reuse - // those for virtual registers in between. - LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n"); - spillAll(MI); - } - // Third scan. // Allocate defs and collect dead defs. for (unsigned I = 0; I != DefOpEnd; ++I) { Index: lib/Target/ARM/Thumb1InstrInfo.h =================================================================== --- lib/Target/ARM/Thumb1InstrInfo.h +++ lib/Target/ARM/Thumb1InstrInfo.h @@ -53,6 +53,9 @@ const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; + const TargetRegisterClass * + getRegClassForStackSaveRestore(const TargetRegisterClass *RC) const override; + bool canCopyGluedNodeDuringSchedule(SDNode *N) const override; private: void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override; Index: lib/Target/ARM/Thumb1InstrInfo.cpp =================================================================== --- lib/Target/ARM/Thumb1InstrInfo.cpp +++ lib/Target/ARM/Thumb1InstrInfo.cpp @@ -132,6 +132,13 @@ } } +const TargetRegisterClass *Thumb1InstrInfo::getRegClassForStackSaveRestore( + const TargetRegisterClass *RC) const { + if (ARM::hGPRRegClass.hasSubClassEq(RC)) + return &ARM::tGPRRegClass; + return RC; +} + void Thumb1InstrInfo::expandLoadStackGuard( MachineBasicBlock::iterator MI) const { MachineFunction &MF = *MI->getParent()->getParent(); Index: test/CodeGen/Thumb/hgpr-spill-basic.mir =================================================================== --- /dev/null +++ test/CodeGen/Thumb/hgpr-spill-basic.mir @@ -0,0 +1,74 @@ +# RUN: llc -run-pass regallocbasic %s -o - | FileCheck %s --check-prefix=CHECK-ALLOC +# RUN: llc -run-pass regallocbasic,virtregrewriter %s -o - | FileCheck %s --check-prefix=CHECK-REWRITE + +# This test examines register allocation and spilling of high register in Thumb1 +# with Basic Register Allocator. The test uses two consecutive inline assembler +# expressions that both request an input variable to be loaded in a high +# register. The first expression marks {r8, r9, r10, r11} as clobbered, the +# second one marks {r12, lr} as such. The allocator cannot choose the same +# register to load the variable and a spill occurs. +# +# The test checks that InlineSpiller used by Basic Register Allocator implements +# the following: +# * A high register in Thumb1 is spilled by inserting a copy to a low register +# and then saving that. +# * A high register in Thumb1 is restored by inserting a load to a low register +# and then a copy to the high register. + +--- | + ; ModuleID = 'test.ll' + source_filename = "test.c" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv6m-none--eabi" + + define dso_local void @constraint_h() { + entry: + %i = alloca i32, align 4 + %0 = load i32, i32* %i, align 4 + call void asm sideeffect "@ $0", "h,~{r8},~{r9},~{r10},~{r11}"(i32 %0) + call void asm sideeffect "@ $0", "h,~{r12},~{lr}"(i32 %0) + ret void + } + +... +--- +name: constraint_h +tracksRegLiveness: true +registers: + - { id: 0, class: hgpr } + - { id: 1, class: tgpr } +stack: + - { id: 0, name: i, size: 4, alignment: 4, stack-id: 0, local-offset: -4 } +body: | + bb.0.entry: + %1:tgpr = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i) + %0:hgpr = COPY %1 + INLINEASM &"@ $0", 1, 589833, %0, 12, implicit-def early-clobber $r8, implicit-def early-clobber $r9, implicit-def early-clobber $r10, implicit-def early-clobber $r11 + INLINEASM &"@ $0", 1, 589833, %0, 12, implicit-def early-clobber $r12, implicit-def early-clobber $lr + tBX_RET 14, $noreg + +... + +# CHECK-ALLOC: bb.0.entry: +# CHECK-ALLOC-NEXT: %1:tgpr = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i) +# CHECK-ALLOC-NEXT: %2:gpr = COPY %1 +# CHECK-ALLOC-NEXT: %3:tgpr = COPY %2 +# CHECK-ALLOC-NEXT: tSTRspi %3, %stack.1, 0, 14, $noreg :: (store 4 into %stack.1) +# CHECK-ALLOC-NEXT: %5:tgpr = tLDRspi %stack.1, 0, 14, $noreg :: (load 4 from %stack.1) +# CHECK-ALLOC-NEXT: %4:hgpr = COPY %5 +# CHECK-ALLOC-NEXT: INLINEASM &"@ $0", 1, 589833, %4, 12, implicit-def early-clobber $r8, implicit-def early-clobber $r9, implicit-def early-clobber $r10, implicit-def early-clobber $r11 +# CHECK-ALLOC-NEXT: %7:tgpr = tLDRspi %stack.1, 0, 14, $noreg :: (load 4 from %stack.1) +# CHECK-ALLOC-NEXT: %6:hgpr = COPY %7 +# CHECK-ALLOC-NEXT: INLINEASM &"@ $0", 1, 589833, %6, 12, implicit-def early-clobber $r12, implicit-def early-clobber $lr +# CHECK-ALLOC-NEXT: tBX_RET 14, $noreg + +# CHECK-REWRITE: bb.0.entry: +# CHECK-REWRITE-NEXT: renamable $r0 = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i) +# CHECK-REWRITE-NEXT: tSTRspi killed renamable $r0, %stack.1, 0, 14, $noreg :: (store 4 into %stack.1) +# CHECK-REWRITE-NEXT: renamable $r0 = tLDRspi %stack.1, 0, 14, $noreg :: (load 4 from %stack.1) +# CHECK-REWRITE-NEXT: renamable $r12 = COPY killed renamable $r0 +# CHECK-REWRITE-NEXT: INLINEASM &"@ $0", 1, 589833, killed renamable $r12, 12, implicit-def early-clobber $r8, implicit-def early-clobber $r9, implicit-def early-clobber $r10, implicit-def early-clobber $r11 +# CHECK-REWRITE-NEXT: renamable $r0 = tLDRspi %stack.1, 0, 14, $noreg :: (load 4 from %stack.1) +# CHECK-REWRITE-NEXT: renamable $r8 = COPY killed renamable $r0 +# CHECK-REWRITE-NEXT: INLINEASM &"@ $0", 1, 589833, killed renamable $r8, 12, implicit-def early-clobber $r12, implicit-def early-clobber $lr +# CHECK-REWRITE-NEXT: tBX_RET 14, $noreg Index: test/CodeGen/Thumb/hgpr-spill-fast-all.mir =================================================================== --- /dev/null +++ test/CodeGen/Thumb/hgpr-spill-fast-all.mir @@ -0,0 +1,155 @@ +# RUN: llc -run-pass regallocfast %s -o - | FileCheck %s + +# Check that Fast Register Allocator can succesfully spill all virtual registers +# before a call instruction, including any high registers. +# +# The test operates as follows: +# * Load a value in a high register which gets allocated to r12. +# * Load values in all low registers r0-r7. +# * Perform a call. The allocator spills all virtual registers prior calls and +# so it must be able to successfully store the values loaded in r12, r0-r7 to +# the stack. + +--- | + ; ModuleID = 'test.ll' + source_filename = "test.c" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv6m-none--eabi" + + define dso_local i32 @constraint_h() { + entry: + %ih = alloca i32, align 4 + %i0 = alloca i32, align 4 + %i1 = alloca i32, align 4 + %i2 = alloca i32, align 4 + %i3 = alloca i32, align 4 + %i4 = alloca i32, align 4 + %i5 = alloca i32, align 4 + %i6 = alloca i32, align 4 + %i7 = alloca i32, align 4 + %0 = load i32, i32* %ih, align 4 + %1 = load i32, i32* %i0, align 4 + %2 = load i32, i32* %i1, align 4 + %3 = load i32, i32* %i2, align 4 + %4 = load i32, i32* %i3, align 4 + %5 = load i32, i32* %i4, align 4 + %6 = load i32, i32* %i5, align 4 + %7 = load i32, i32* %i6, align 4 + %8 = load i32, i32* %i7, align 4 + call void @bar() + %add = add nsw i32 %0, %1 + %add1 = add nsw i32 %add, %2 + %add2 = add nsw i32 %add1, %3 + %add3 = add nsw i32 %add2, %4 + %add4 = add nsw i32 %add3, %5 + %add5 = add nsw i32 %add4, %6 + %add6 = add nsw i32 %add5, %7 + %add7 = add nsw i32 %add6, %8 + ret i32 %add7 + } + + declare void @bar() + +... +--- +name: constraint_h +tracksRegLiveness: true +registers: + - { id: 0, class: tgpr } + - { id: 1, class: hgpr } + - { id: 2, class: tgpr } + - { id: 3, class: tgpr } + - { id: 4, class: tgpr } + - { id: 5, class: tgpr } + - { id: 6, class: tgpr } + - { id: 7, class: tgpr } + - { id: 8, class: tgpr } + - { id: 9, class: tgpr } + - { id: 10, class: tgpr } + - { id: 11, class: tgpr } + - { id: 12, class: tgpr } + - { id: 13, class: tgpr } + - { id: 14, class: tgpr } + - { id: 15, class: tgpr } + - { id: 16, class: tgpr } + - { id: 17, class: tgpr } + - { id: 18, class: tgpr } +stack: + - { id: 0, name: ih, size: 4, alignment: 4, stack-id: 0, local-offset: -4 } + - { id: 1, name: i0, size: 4, alignment: 4, stack-id: 0, local-offset: -8 } + - { id: 2, name: i1, size: 4, alignment: 4, stack-id: 0, local-offset: -12 } + - { id: 3, name: i2, size: 4, alignment: 4, stack-id: 0, local-offset: -16 } + - { id: 4, name: i3, size: 4, alignment: 4, stack-id: 0, local-offset: -20 } + - { id: 5, name: i4, size: 4, alignment: 4, stack-id: 0, local-offset: -24 } + - { id: 6, name: i5, size: 4, alignment: 4, stack-id: 0, local-offset: -28 } + - { id: 7, name: i6, size: 4, alignment: 4, stack-id: 0, local-offset: -32 } + - { id: 8, name: i7, size: 4, alignment: 4, stack-id: 0, local-offset: -36 } +body: | + bb.0.entry: + %0:tgpr = tLDRspi %stack.0.ih, 0, 14, $noreg :: (dereferenceable load 4 from %ir.ih) + %1:hgpr = COPY %0 + %2:tgpr = tLDRspi %stack.1.i0, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i0) + %3:tgpr = tLDRspi %stack.2.i1, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i1) + %4:tgpr = tLDRspi %stack.3.i2, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i2) + %5:tgpr = tLDRspi %stack.4.i3, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i3) + %6:tgpr = tLDRspi %stack.5.i4, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i4) + %7:tgpr = tLDRspi %stack.6.i5, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i5) + %8:tgpr = tLDRspi %stack.7.i6, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i6) + %9:tgpr = tLDRspi %stack.8.i7, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i7) + tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + %10:tgpr = COPY %1 + %11:tgpr, $cpsr = nsw tADDrr %10, %2, 14, $noreg + %12:tgpr, $cpsr = nsw tADDrr %11, %3, 14, $noreg + %13:tgpr, $cpsr = nsw tADDrr %12, %4, 14, $noreg + %14:tgpr, $cpsr = nsw tADDrr %13, %5, 14, $noreg + %15:tgpr, $cpsr = nsw tADDrr %14, %6, 14, $noreg + %16:tgpr, $cpsr = nsw tADDrr %15, %7, 14, $noreg + %17:tgpr, $cpsr = nsw tADDrr %16, %8, 14, $noreg + %18:tgpr, $cpsr = nsw tADDrr %17, %9, 14, $noreg + $r0 = COPY %18 + tBX_RET 14, $noreg, implicit $r0 + +... + +# CHECK: bb.0.entry: +# CHECK-NEXT: renamable $r0 = tLDRspi %stack.0.ih, 0, 14, $noreg :: (dereferenceable load 4 from %ir.ih) +# CHECK-NEXT: renamable $r12 = COPY killed renamable $r0 +# CHECK-NEXT: renamable $r0 = tLDRspi %stack.1.i0, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i0) +# CHECK-NEXT: renamable $r1 = tLDRspi %stack.2.i1, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i1) +# CHECK-NEXT: renamable $r2 = tLDRspi %stack.3.i2, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i2) +# CHECK-NEXT: renamable $r3 = tLDRspi %stack.4.i3, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i3) +# CHECK-NEXT: renamable $r4 = tLDRspi %stack.5.i4, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i4) +# CHECK-NEXT: renamable $r5 = tLDRspi %stack.6.i5, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i5) +# CHECK-NEXT: renamable $r6 = tLDRspi %stack.7.i6, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i6) +# CHECK-NEXT: renamable $r7 = tLDRspi %stack.8.i7, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i7) +# CHECK-NEXT: tSTRspi killed $r0, %stack.10, 0, 14, $noreg :: (store 4 into %stack.10) +# CHECK-NEXT: renamable $r0 = COPY killed $r12 +# CHECK-NEXT: tSTRspi killed renamable $r0, %stack.9, 0, 14, $noreg :: (store 4 into %stack.9) +# CHECK-NEXT: tSTRspi killed $r1, %stack.11, 0, 14, $noreg :: (store 4 into %stack.11) +# CHECK-NEXT: tSTRspi killed $r2, %stack.12, 0, 14, $noreg :: (store 4 into %stack.12) +# CHECK-NEXT: tSTRspi killed $r3, %stack.13, 0, 14, $noreg :: (store 4 into %stack.13) +# CHECK-NEXT: tSTRspi killed $r4, %stack.14, 0, 14, $noreg :: (store 4 into %stack.14) +# CHECK-NEXT: tSTRspi killed $r5, %stack.15, 0, 14, $noreg :: (store 4 into %stack.15) +# CHECK-NEXT: tSTRspi killed $r6, %stack.16, 0, 14, $noreg :: (store 4 into %stack.16) +# CHECK-NEXT: tSTRspi killed $r7, %stack.17, 0, 14, $noreg :: (store 4 into %stack.17) +# CHECK-NEXT: tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp +# CHECK-NEXT: renamable $r0 = tLDRspi %stack.9, 0, 14, $noreg :: (load 4 from %stack.9) +# CHECK-NEXT: $r12 = COPY killed renamable $r0 +# CHECK-NEXT: renamable $r0 = COPY killed renamable $r12 +# CHECK-NEXT: $r1 = tLDRspi %stack.10, 0, 14, $noreg :: (load 4 from %stack.10) +# CHECK-NEXT: renamable $r0, $cpsr = nsw tADDrr killed renamable $r0, killed renamable $r1, 14, $noreg +# CHECK-NEXT: $r2 = tLDRspi %stack.11, 0, 14, $noreg :: (load 4 from %stack.11) +# CHECK-NEXT: renamable $r0, $cpsr = nsw tADDrr killed renamable $r0, killed renamable $r2, 14, $noreg +# CHECK-NEXT: $r3 = tLDRspi %stack.12, 0, 14, $noreg :: (load 4 from %stack.12) +# CHECK-NEXT: renamable $r0, $cpsr = nsw tADDrr killed renamable $r0, killed renamable $r3, 14, $noreg +# CHECK-NEXT: $r4 = tLDRspi %stack.13, 0, 14, $noreg :: (load 4 from %stack.13) +# CHECK-NEXT: renamable $r0, $cpsr = nsw tADDrr killed renamable $r0, killed renamable $r4, 14, $noreg +# CHECK-NEXT: $r5 = tLDRspi %stack.14, 0, 14, $noreg :: (load 4 from %stack.14) +# CHECK-NEXT: renamable $r0, $cpsr = nsw tADDrr killed renamable $r0, killed renamable $r5, 14, $noreg +# CHECK-NEXT: $r6 = tLDRspi %stack.15, 0, 14, $noreg :: (load 4 from %stack.15) +# CHECK-NEXT: renamable $r0, $cpsr = nsw tADDrr killed renamable $r0, killed renamable $r6, 14, $noreg +# CHECK-NEXT: $r7 = tLDRspi %stack.16, 0, 14, $noreg :: (load 4 from %stack.16) +# CHECK-NEXT: renamable $r0, $cpsr = nsw tADDrr killed renamable $r0, killed renamable $r7, 14, $noreg +# CHECK-NEXT: $r1 = tLDRspi %stack.17, 0, 14, $noreg :: (load 4 from %stack.17) +# CHECK-NEXT: renamable $r0, $cpsr = nsw tADDrr killed renamable $r0, killed renamable $r1, 14, $noreg +# CHECK-NEXT: tBX_RET 14, $noreg, implicit killed $r0 Index: test/CodeGen/Thumb/hgpr-spill-fast-tsave.mir =================================================================== --- /dev/null +++ test/CodeGen/Thumb/hgpr-spill-fast-tsave.mir @@ -0,0 +1,116 @@ +# RUN: llc -run-pass regallocfast %s -o - | FileCheck %s + +# Check that when storing a high register to a stack slot using an intermediary, +# Fast Register Allocator is also able to spill a value in a register that it +# needs to allocate for the intermediary. +# +# The test operates as follows: +# * Physically define registers r0-r6 to make them reserved. +# * Load a value in a high register which gets allocated to r12. +# * Load a value in a low register which gets allocated to the remaining +# register r7. +# * Use INLINEASM that has r0-r6 and the value currently in r7 as inputs but +# marks r12 as clobbered. The allocator must store the current value in r12 to +# the stack. This requires the value in r7 to be also spilled and then +# reloaded. + +--- | + ; ModuleID = 'test.ll' + source_filename = "test.c" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv6m-none--eabi" + + define dso_local i32 @constraint_h() { + entry: + %i0 = alloca i32, align 4 + %i1 = alloca i32, align 4 + %i2 = alloca i32, align 4 + %i3 = alloca i32, align 4 + %i4 = alloca i32, align 4 + %i5 = alloca i32, align 4 + %i6 = alloca i32, align 4 + %ih = alloca i32, align 4 + %i7 = alloca i32, align 4 + %0 = load i32, i32* %i0, align 4 + %1 = load i32, i32* %i1, align 4 + %2 = load i32, i32* %i2, align 4 + %3 = load i32, i32* %i3, align 4 + %4 = load i32, i32* %i4, align 4 + %5 = load i32, i32* %i5, align 4 + %6 = load i32, i32* %i6, align 4 + %7 = load i32, i32* %ih, align 4 + %8 = load i32, i32* %i7, align 4 + call void asm sideeffect "@ $0 $1 $2 $3 $4 $5 $6 $7", "{r0},{r1},{r2},{r3},{r4},{r5},{r6},r,~{r12}"(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %8) + ret i32 %8 + } + +... +--- +name: constraint_h +tracksRegLiveness: true +registers: + - { id: 0, class: tgpr } + - { id: 1, class: tgpr } + - { id: 2, class: tgpr } + - { id: 3, class: tgpr } + - { id: 4, class: tgpr } + - { id: 5, class: tgpr } + - { id: 6, class: tgpr } + - { id: 7, class: tgpr } + - { id: 8, class: hgpr } + - { id: 9, class: tgpr } +stack: + - { id: 0, name: i0, size: 4, alignment: 4, stack-id: 0, local-offset: -4 } + - { id: 1, name: i1, size: 4, alignment: 4, stack-id: 0, local-offset: -8 } + - { id: 2, name: i2, size: 4, alignment: 4, stack-id: 0, local-offset: -12 } + - { id: 3, name: i3, size: 4, alignment: 4, stack-id: 0, local-offset: -16 } + - { id: 4, name: i4, size: 4, alignment: 4, stack-id: 0, local-offset: -20 } + - { id: 5, name: i5, size: 4, alignment: 4, stack-id: 0, local-offset: -24 } + - { id: 6, name: i6, size: 4, alignment: 4, stack-id: 0, local-offset: -28 } + - { id: 7, name: ih, size: 4, alignment: 4, stack-id: 0, local-offset: -32 } + - { id: 8, name: i7, size: 4, alignment: 4, stack-id: 0, local-offset: -36 } +body: | + bb.0.entry: + %0:tgpr = tLDRspi %stack.0.i0, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i0) + %1:tgpr = tLDRspi %stack.1.i1, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i1) + %2:tgpr = tLDRspi %stack.2.i2, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i2) + %3:tgpr = tLDRspi %stack.3.i3, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i3) + %4:tgpr = tLDRspi %stack.4.i4, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i4) + %5:tgpr = tLDRspi %stack.5.i5, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i5) + %6:tgpr = tLDRspi %stack.6.i6, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i6) + $r0 = COPY %0 + $r1 = COPY %1 + $r2 = COPY %2 + $r3 = COPY %3 + $r4 = COPY %4 + $r5 = COPY %5 + $r6 = COPY %6 + %7:tgpr = tLDRspi %stack.7.ih, 0, 14, $noreg :: (dereferenceable load 4 from %ir.ih) + %8:hgpr = COPY %7 + %9:tgpr = tLDRspi %stack.8.i7, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i7) + INLINEASM &"@ $0 $1 $2 $3 $4 $5 $6 $7", 1, 9, $r0, 9, $r1, 9, $r2, 9, $r3, 9, $r4, 9, $r5, 9, $r6, 655369, %9, 12, implicit-def early-clobber $r12 + $r0 = COPY %8 + tBX_RET 14, $noreg, implicit $r0 + +... + +# CHECK: bb.0.entry: +# CHECK-NEXT: renamable $r0 = tLDRspi %stack.0.i0, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i0) +# CHECK-NEXT: renamable $r1 = tLDRspi %stack.1.i1, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i1) +# CHECK-NEXT: renamable $r2 = tLDRspi %stack.2.i2, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i2) +# CHECK-NEXT: renamable $r3 = tLDRspi %stack.3.i3, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i3) +# CHECK-NEXT: renamable $r4 = tLDRspi %stack.4.i4, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i4) +# CHECK-NEXT: renamable $r5 = tLDRspi %stack.5.i5, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i5) +# CHECK-NEXT: renamable $r6 = tLDRspi %stack.6.i6, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i6) +# CHECK-NEXT: renamable $r7 = tLDRspi %stack.7.ih, 0, 14, $noreg :: (dereferenceable load 4 from %ir.ih) +# CHECK-NEXT: renamable $r12 = COPY killed renamable $r7 +# CHECK-NEXT: renamable $r7 = tLDRspi %stack.8.i7, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i7) +# CHECK-NEXT: tSTRspi killed $r7, %stack.10, 0, 14, $noreg :: (store 4 into %stack.10) +# CHECK-NEXT: renamable $r7 = COPY killed $r12 +# CHECK-NEXT: tSTRspi killed renamable $r7, %stack.9, 0, 14, $noreg :: (store 4 into %stack.9) +# CHECK-NEXT: $r7 = tLDRspi %stack.10, 0, 14, $noreg :: (load 4 from %stack.10) +# CHECK-NEXT: INLINEASM &"@ $0 $1 $2 $3 $4 $5 $6 $7", 1, 9, killed $r0, 9, killed $r1, 9, killed $r2, 9, killed $r3, 9, killed $r4, 9, killed $r5, 9, killed $r6, 655369, killed renamable $r7, 12, implicit-def early-clobber $r12 +# CHECK-NEXT: renamable $r0 = tLDRspi %stack.9, 0, 14, $noreg :: (load 4 from %stack.9) +# CHECK-NEXT: $r12 = COPY killed renamable $r0 +# CHECK-NEXT: $r0 = COPY killed renamable $r12 +# CHECK-NEXT: tBX_RET 14, $noreg, implicit killed $r0 Index: test/CodeGen/Thumb/hgpr-spill-fast-tsave2.mir =================================================================== --- /dev/null +++ test/CodeGen/Thumb/hgpr-spill-fast-tsave2.mir @@ -0,0 +1,116 @@ +# RUN: llc -run-pass regallocfast %s -o - | FileCheck %s + +# Check that when storing a high register to a stack slot using an intermediary, +# Fast Register Allocator is able to insert a temporary spill of a register that +# it needs for the intermediary if no such register can be normally allocated. +# +# The test operates as follows: +# * Physically define registers r0-r6 to make them reserved. +# * Load a value in a high register which gets allocated to r12. +# * Physically define the remaining low register r7 to make it reserved. +# * Use INLINEASM that has r0-r7 as inputs but marks r12 as clobbered. The +# allocator must store the current value in r12 to the stack. This requires a +# temporary spill of one of the low registers that are already used by +# INLINEASM. + +--- | + ; ModuleID = 'test.ll' + source_filename = "test.c" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv6m-none--eabi" + + define dso_local i32 @constraint_h() { + entry: + %i0 = alloca i32, align 4 + %i1 = alloca i32, align 4 + %i2 = alloca i32, align 4 + %i3 = alloca i32, align 4 + %i4 = alloca i32, align 4 + %i5 = alloca i32, align 4 + %i6 = alloca i32, align 4 + %ih = alloca i32, align 4 + %i7 = alloca i32, align 4 + %0 = load i32, i32* %i0, align 4 + %1 = load i32, i32* %i1, align 4 + %2 = load i32, i32* %i2, align 4 + %3 = load i32, i32* %i3, align 4 + %4 = load i32, i32* %i4, align 4 + %5 = load i32, i32* %i5, align 4 + %6 = load i32, i32* %i6, align 4 + %7 = load i32, i32* %ih, align 4 + %8 = load i32, i32* %i7, align 4 + call void asm sideeffect "@ $0 $1 $2 $3 $4 $5 $6 $7", "{r0},{r1},{r2},{r3},{r4},{r5},{r6},{r7},~{r12}"(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %8) + ret i32 %8 + } + +... +--- +name: constraint_h +tracksRegLiveness: true +registers: + - { id: 0, class: tgpr } + - { id: 1, class: tgpr } + - { id: 2, class: tgpr } + - { id: 3, class: tgpr } + - { id: 4, class: tgpr } + - { id: 5, class: tgpr } + - { id: 6, class: tgpr } + - { id: 7, class: tgpr } + - { id: 8, class: hgpr } + - { id: 9, class: tgpr } +stack: + - { id: 0, name: i0, size: 4, alignment: 4, stack-id: 0, local-offset: -4 } + - { id: 1, name: i1, size: 4, alignment: 4, stack-id: 0, local-offset: -8 } + - { id: 2, name: i2, size: 4, alignment: 4, stack-id: 0, local-offset: -12 } + - { id: 3, name: i3, size: 4, alignment: 4, stack-id: 0, local-offset: -16 } + - { id: 4, name: i4, size: 4, alignment: 4, stack-id: 0, local-offset: -20 } + - { id: 5, name: i5, size: 4, alignment: 4, stack-id: 0, local-offset: -24 } + - { id: 6, name: i6, size: 4, alignment: 4, stack-id: 0, local-offset: -28 } + - { id: 7, name: ih, size: 4, alignment: 4, stack-id: 0, local-offset: -32 } + - { id: 8, name: i7, size: 4, alignment: 4, stack-id: 0, local-offset: -36 } +body: | + bb.0.entry: + %0:tgpr = tLDRspi %stack.0.i0, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i0) + %1:tgpr = tLDRspi %stack.1.i1, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i1) + %2:tgpr = tLDRspi %stack.2.i2, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i2) + %3:tgpr = tLDRspi %stack.3.i3, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i3) + %4:tgpr = tLDRspi %stack.4.i4, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i4) + %5:tgpr = tLDRspi %stack.5.i5, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i5) + %6:tgpr = tLDRspi %stack.6.i6, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i6) + $r0 = COPY %0 + $r1 = COPY %1 + $r2 = COPY %2 + $r3 = COPY %3 + $r4 = COPY %4 + $r5 = COPY %5 + $r6 = COPY %6 + %7:tgpr = tLDRspi %stack.7.ih, 0, 14, $noreg :: (dereferenceable load 4 from %ir.ih) + %8:hgpr = COPY %7 + %9:tgpr = tLDRspi %stack.8.i7, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i7) + $r7 = COPY %9 + INLINEASM &"@ $0 $1 $2 $3 $4 $5 $6 $7", 1, 9, $r0, 9, $r1, 9, $r2, 9, $r3, 9, $r4, 9, $r5, 9, $r6, 9, $r7, 12, implicit-def early-clobber $r12 + $r0 = COPY %8 + tBX_RET 14, $noreg, implicit $r0 + +... + +# CHECK: bb.0.entry: +# CHECK-NEXT: renamable $r0 = tLDRspi %stack.0.i0, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i0) +# CHECK-NEXT: renamable $r1 = tLDRspi %stack.1.i1, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i1) +# CHECK-NEXT: renamable $r2 = tLDRspi %stack.2.i2, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i2) +# CHECK-NEXT: renamable $r3 = tLDRspi %stack.3.i3, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i3) +# CHECK-NEXT: renamable $r4 = tLDRspi %stack.4.i4, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i4) +# CHECK-NEXT: renamable $r5 = tLDRspi %stack.5.i5, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i5) +# CHECK-NEXT: renamable $r6 = tLDRspi %stack.6.i6, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i6) +# CHECK-NEXT: renamable $r7 = tLDRspi %stack.7.ih, 0, 14, $noreg :: (dereferenceable load 4 from %ir.ih) +# CHECK-NEXT: renamable $r12 = COPY killed renamable $r7 +# CHECK-NEXT: renamable $r7 = tLDRspi %stack.8.i7, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i7) +# CHECK-NEXT: tSTRspi killed $r0, %stack.10, 0, 14, $noreg :: (store 4 into %stack.10) +# CHECK-NEXT: renamable $r0 = COPY killed $r12 +# CHECK-NEXT: tSTRspi killed renamable $r0, %stack.9, 0, 14, $noreg :: (store 4 into %stack.9) +# CHECK-NEXT: $r0 = tLDRspi %stack.10, 0, 14, $noreg :: (load 4 from %stack.10) +# CHECK-NEXT: INLINEASM &"@ $0 $1 $2 $3 $4 $5 $6 $7", 1, 9, killed $r0, 9, killed $r1, 9, killed $r2, 9, killed $r3, 9, killed $r4, 9, killed $r5, 9, killed $r6, 9, killed $r7, 12, implicit-def early-clobber $r12 +# CHECK-NEXT: renamable $r0 = tLDRspi %stack.9, 0, 14, $noreg :: (load 4 from %stack.9) +# CHECK-NEXT: $r12 = COPY killed renamable $r0 +# CHECK-NEXT: $r0 = COPY killed renamable $r12 +# CHECK-NEXT: tBX_RET 14, $noreg, implicit killed $r0 Index: test/CodeGen/Thumb/hgpr-spill-fast.mir =================================================================== --- /dev/null +++ test/CodeGen/Thumb/hgpr-spill-fast.mir @@ -0,0 +1,56 @@ +# RUN: llc -run-pass regallocfast %s -o - | FileCheck %s + +# This test examines register allocation and spilling of high registers in +# Thumb1 with Fast Register Allocator. The test uses inline assembler that +# requests an input variable to be loaded in a high register but at the same +# time has r12 marked as clobbered. The allocator initially satisfies the load +# request by selecting r12 but then needs to spill this register when it reaches +# the INLINEASM instruction and notices its clobber definition. +# +# The test checks that Fast Register Allocator implements the following: +# * A high register in Thumb1 is spilled by inserting a copy to a low register +# and then saving that. +# * A high register in Thumb1 is restored by inserting a load to a low register +# and then a copy to the high register. + +--- | + ; ModuleID = 'test.ll' + source_filename = "test.c" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv6m-none--eabi" + + define dso_local void @constraint_h() { + entry: + %i = alloca i32, align 4 + %0 = load i32, i32* %i, align 4 + call void asm sideeffect "@ $0", "h,~{r12}"(i32 %0) + ret void + } + +... +--- +name: constraint_h +tracksRegLiveness: true +registers: + - { id: 0, class: hgpr } + - { id: 1, class: tgpr } +stack: + - { id: 0, name: i, size: 4, alignment: 4, stack-id: 0, local-offset: -4 } +body: | + bb.0.entry: + %1:tgpr = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i) + %0:hgpr = COPY %1 + INLINEASM &"@ $0", 1, 589833, %0, 12, implicit-def early-clobber $r12 + tBX_RET 14, $noreg + +... + +# CHECK: bb.0.entry: +# CHECK-NEXT: renamable $r0 = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i) +# CHECK-NEXT: renamable $r12 = COPY killed renamable $r0 +# CHECK-NEXT: renamable $r0 = COPY killed $r12 +# CHECK-NEXT: tSTRspi killed renamable $r0, %stack.1, 0, 14, $noreg :: (store 4 into %stack.1) +# CHECK-NEXT: renamable $r0 = tLDRspi %stack.1, 0, 14, $noreg :: (load 4 from %stack.1) +# CHECK-NEXT: $r8 = COPY killed renamable $r0 +# CHECK-NEXT: INLINEASM &"@ $0", 1, 589833, killed renamable $r8, 12, implicit-def early-clobber $r12 +# CHECK-NEXT: tBX_RET 14, $noreg