Index: include/llvm/CodeGen/LiveRangeEdit.h =================================================================== --- include/llvm/CodeGen/LiveRangeEdit.h +++ include/llvm/CodeGen/LiveRangeEdit.h @@ -40,6 +40,7 @@ class MachineLoopInfo; class MachineOperand; class TargetInstrInfo; +class TargetRegisterClass; class TargetRegisterInfo; class VirtRegMap; @@ -177,8 +178,10 @@ return makeArrayRef(NewRegs).slice(FirstNew); } - /// createFrom - Create a new virtual register based on OldReg. - unsigned createFrom(unsigned OldReg); + /// createFrom - Create a new virtual register based on OldReg. If RC is + /// specified then the register will have this class, else the class of OldReg + /// is used. + unsigned createFrom(unsigned OldReg, const TargetRegisterClass *RC = nullptr); /// create - Create a new register with the same class and original slot as /// parent. Index: include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- include/llvm/CodeGen/TargetInstrInfo.h +++ include/llvm/CodeGen/TargetInstrInfo.h @@ -880,6 +880,29 @@ "TargetInstrInfo::loadRegFromStackSlot!"); } + /// Return a register class that is appropriate for stack save/restore of the + /// given register class. + /// + /// For instance, Thumb1 does not provide instructions to directly + /// save/restore high registers. Storing a high register must be done by first + /// copying the value in a low register and then saving this register. + /// Similarly, reload requires an adequately reversed sequence. For this case, + /// the method returns the low-register class when given the high-register + /// class. + /// + /// This allows to allocate a new register with the returned class and insert + /// a COPY instruction before/after the store/load created by + /// storeRegToStackSlot()/loadRegFromStackSlot(): + /// %1:save-restore-class = COPY %0:original-class + /// STR %1:save-restore-class, %stack.1 + /// + /// %1:save-restore-class = LDR %stack.1 + /// %0:original-class = COPY %1:save-restore-class + virtual const TargetRegisterClass * + getRegClassForStackSaveRestore(const TargetRegisterClass *RC) const { + return RC; + } + /// This function is called for all pseudo instructions /// that remain after register allocation. Many pseudo instructions are /// created to help register allocation. This is the place to convert them Index: lib/CodeGen/InlineSpiller.cpp =================================================================== --- lib/CodeGen/InlineSpiller.cpp +++ lib/CodeGen/InlineSpiller.cpp @@ -222,7 +222,7 @@ bool foldMemoryOperand(ArrayRef>, MachineInstr *LoadMI = nullptr); void insertReload(unsigned VReg, SlotIndex, MachineBasicBlock::iterator MI); - void insertSpill(unsigned VReg, bool isKill, MachineBasicBlock::iterator MI); + void insertSpill(unsigned VReg, MachineBasicBlock::iterator MI); void spillAroundUses(unsigned Reg); void spillAll(); @@ -872,8 +872,21 @@ MachineBasicBlock &MBB = *MI->getParent(); MachineInstrSpan MIS(MI); - TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot, - MRI.getRegClass(NewVReg), &TRI); + unsigned LoadReg = NewVReg; + const TargetRegisterClass &RC = *MRI.getRegClass(NewVReg); + const TargetRegisterClass &LoadRC = *TII.getRegClassForStackSaveRestore(&RC); + if (&RC != &LoadRC) { + LoadReg = Edit->createFrom(NewVReg, &LoadRC); + LLVM_DEBUG(dbgs() << "Using " << printReg(LoadReg, &TRI) << ":" + << TRI.getRegClassName(&LoadRC) + << " as an intermediate for the reload\n"); + } + + TII.loadRegFromStackSlot(MBB, MI, LoadReg, StackSlot, &LoadRC, &TRI); + + if (&RC != &LoadRC) + BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(TargetOpcode::COPY), NewVReg) + .addReg(LoadReg, RegState::Kill); LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI); @@ -897,31 +910,47 @@ } /// insertSpill - Insert a spill of NewVReg after MI. -void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, - MachineBasicBlock::iterator MI) { +void InlineSpiller::insertSpill(unsigned NewVReg, + MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - MachineInstrSpan MIS(MI); + MachineBasicBlock::iterator InsertMI = std::next(MI); bool IsRealSpill = true; if (isFullUndefDef(*MI)) { // Don't spill undef value. // Anything works for undef, in particular keeping the memory // uninitialized is a viable option and it saves code size and // run time. - BuildMI(MBB, std::next(MI), MI->getDebugLoc(), TII.get(TargetOpcode::KILL)) - .addReg(NewVReg, getKillRegState(isKill)); + BuildMI(MBB, InsertMI, MI->getDebugLoc(), TII.get(TargetOpcode::KILL)) + .addReg(NewVReg, RegState::Kill); IsRealSpill = false; - } else - TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot, - MRI.getRegClass(NewVReg), &TRI); + } else { + unsigned StoreReg = NewVReg; + const TargetRegisterClass &RC = *MRI.getRegClass(NewVReg); + const TargetRegisterClass &StoreRC = + *TII.getRegClassForStackSaveRestore(&RC); + if (&RC != &StoreRC) { + StoreReg = Edit->createFrom(NewVReg, &StoreRC); + LLVM_DEBUG(dbgs() << "Using " << printReg(StoreReg, &TRI) << ":" + << TRI.getRegClassName(&StoreRC) + << " as an intermediate for the spill\n"); + + BuildMI(MBB, InsertMI, MI->getDebugLoc(), TII.get(TargetOpcode::COPY), + StoreReg) + .addReg(NewVReg, RegState::Kill); + } + + TII.storeRegToStackSlot(MBB, InsertMI, StoreReg, RegState::Kill, StackSlot, + &StoreRC, &TRI); + } - LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end()); + LIS.InsertMachineInstrRangeInMaps(std::next(MI), InsertMI); - LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS, + LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), InsertMI, LIS, "spill")); ++NumSpills; if (IsRealSpill) - HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original); + HSpiller.addToMergeableSpills(*std::prev(InsertMI), StackSlot, Original); } /// spillAroundUses - insert spill code around each use of Reg. @@ -1021,7 +1050,7 @@ // FIXME: Use a second vreg if instruction has no tied ops. if (RI.Writes) if (hasLiveDef) - insertSpill(NewVReg, true, MI); + insertSpill(NewVReg, MI); } } Index: lib/CodeGen/LiveRangeEdit.cpp =================================================================== --- lib/CodeGen/LiveRangeEdit.cpp +++ lib/CodeGen/LiveRangeEdit.cpp @@ -52,8 +52,11 @@ return LI; } -unsigned LiveRangeEdit::createFrom(unsigned OldReg) { - unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); +unsigned LiveRangeEdit::createFrom(unsigned OldReg, + const TargetRegisterClass *RC) { + if (RC == nullptr) + RC = MRI.getRegClass(OldReg); + unsigned VReg = MRI.createVirtualRegister(RC); if (VRM) { VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); } Index: lib/CodeGen/RegAllocFast.cpp =================================================================== --- lib/CodeGen/RegAllocFast.cpp +++ lib/CodeGen/RegAllocFast.cpp @@ -154,6 +154,9 @@ /// spilling all live registers. LiveRegMap entries should not be erased. bool isBulkSpilling = false; + /// Temporary virtreg used for indirect save/restore. + unsigned IndirectSpillVirtReg = 0; + enum : unsigned { spillClean = 1, spillDirty = 100, @@ -191,6 +194,7 @@ void killVirtReg(unsigned VirtReg); void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator); void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg); + unsigned getIndirectSpillVirtReg(const TargetRegisterClass *RC); void usePhysReg(MachineOperand &MO); void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg, @@ -327,9 +331,39 @@ const TargetRegisterClass &RC = *MRI->getRegClass(LRI->VirtReg); int FI = getStackSpaceFor(LRI->VirtReg, RC); LLVM_DEBUG(dbgs() << " to stack slot #" << FI << "\n"); - TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, &RC, TRI); + + unsigned StoreReg = LR.PhysReg; + const TargetRegisterClass &StoreRC = + *TII->getRegClassForStackSaveRestore(&RC); + LiveRegMap::iterator StoreLRI; + if (&RC != &StoreRC) { + assert(&StoreRC == TII->getRegClassForStackSaveRestore(&StoreRC) && + "Invalid regclass cascade for stack save"); + StoreReg = getIndirectSpillVirtReg(&StoreRC); + LLVM_DEBUG(dbgs() << "Using " << printReg(StoreReg, TRI) << ":" + << TRI->getRegClassName(&StoreRC) + << " as an intermediate for the spill\n"); + + MachineInstr &CopyMI = *BuildMI(*MBB, MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), StoreReg) + .addReg(LR.PhysReg, llvm::RegState::Kill); + StoreLRI = defineVirtReg(CopyMI, 0, StoreReg, 0); + setPhysReg(CopyMI, 0, StoreLRI->PhysReg); + } + + TII->storeRegToStackSlot(*MBB, MI, StoreReg, SpillKill, FI, &StoreRC, TRI); ++NumStores; // Update statistics + if (&RC != &StoreRC) { + MachineInstr &StoreMI = *std::prev(MI); + for (unsigned I = 0, E = StoreMI.getNumOperands(); I != E; ++I) { + const MachineOperand &MO = StoreMI.getOperand(I); + if (MO.isReg() && MO.getReg() == StoreReg) + setPhysReg(StoreMI, I, StoreLRI->PhysReg); + } + killVirtReg(StoreLRI); + } + // If this register is used by DBG_VALUE then insert new DBG_VALUE to // identify spilled location as the place to find corresponding variable's // value. @@ -353,6 +387,14 @@ killVirtReg(LRI); } +unsigned RegAllocFast::getIndirectSpillVirtReg(const TargetRegisterClass *RC) { + if (IndirectSpillVirtReg == 0) + IndirectSpillVirtReg = MRI->createVirtualRegister(RC); + else + MRI->setRegClass(IndirectSpillVirtReg, RC); + return IndirectSpillVirtReg; +} + /// Spill all dirty virtregs without killing them. void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) { if (LiveVirtRegs.empty()) return; @@ -659,8 +701,39 @@ int FrameIndex = getStackSpaceFor(VirtReg, RC); LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into " << printReg(LRI->PhysReg, TRI) << "\n"); - TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, &RC, TRI); + + unsigned LoadReg = LRI->PhysReg; + const TargetRegisterClass &LoadRC = + *TII->getRegClassForStackSaveRestore(&RC); + if (&RC != &LoadRC) { + assert(&LoadRC == TII->getRegClassForStackSaveRestore(&LoadRC) && + "Invalid regclass cascade for stack restore"); + LoadReg = getIndirectSpillVirtReg(&LoadRC); + LLVM_DEBUG(dbgs() << "Using " << printReg(LoadReg, TRI) << ":" + << TRI->getRegClassName(&LoadRC) + << " as an intermediate for the reload\n"); + } + + TII->loadRegFromStackSlot(*MBB, MI, LoadReg, FrameIndex, &LoadRC, TRI); ++NumLoads; + + if (&RC != &LoadRC) { + MachineInstr &LoadMI = *std::prev(MI.getIterator()); + LiveRegMap::iterator LoadLRI = defineVirtReg(LoadMI, 0, LoadReg, 0); + for (unsigned I = 0, E = LoadMI.getNumOperands(); I != E; ++I) { + const MachineOperand &MO = LoadMI.getOperand(I); + if (MO.isReg() && MO.getReg() == LoadReg) + setPhysReg(LoadMI, I, LoadLRI->PhysReg); + } + + MachineInstr &CopyMI = + *BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), + LRI->PhysReg) + .addReg(LoadReg, llvm::RegState::Kill); + setPhysReg(CopyMI, 1, LoadLRI->PhysReg); + + killVirtReg(LoadLRI); + } } else if (LRI->Dirty) { if (isLastUseOfLocalReg(MO)) { LLVM_DEBUG(dbgs() << "Killing last use: " << MO << "\n"); @@ -1102,7 +1175,10 @@ // mapping for all virtual registers unsigned NumVirtRegs = MRI->getNumVirtRegs(); StackSlotForVirtReg.resize(NumVirtRegs); - LiveVirtRegs.setUniverse(NumVirtRegs); + + // Set the universe size to the number of virtual registers in the function + + // one extra register for indirect spills. + LiveVirtRegs.setUniverse(NumVirtRegs + 1); // Loop over all of the basic blocks, eliminating virtual register references for (MachineBasicBlock &MBB : MF) Index: lib/Target/ARM/Thumb1InstrInfo.h =================================================================== --- lib/Target/ARM/Thumb1InstrInfo.h +++ lib/Target/ARM/Thumb1InstrInfo.h @@ -53,6 +53,9 @@ const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; + const TargetRegisterClass * + getRegClassForStackSaveRestore(const TargetRegisterClass *RC) const override; + bool canCopyGluedNodeDuringSchedule(SDNode *N) const override; private: void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override; Index: lib/Target/ARM/Thumb1InstrInfo.cpp =================================================================== --- lib/Target/ARM/Thumb1InstrInfo.cpp +++ lib/Target/ARM/Thumb1InstrInfo.cpp @@ -132,6 +132,13 @@ } } +const TargetRegisterClass *Thumb1InstrInfo::getRegClassForStackSaveRestore( + const TargetRegisterClass *RC) const { + if (ARM::hGPRRegClass.hasSubClassEq(RC)) + return &ARM::tGPRRegClass; + return RC; +} + void Thumb1InstrInfo::expandLoadStackGuard( MachineBasicBlock::iterator MI) const { MachineFunction &MF = *MI->getParent()->getParent(); Index: test/CodeGen/Thumb/hgpr-spill-basic.mir =================================================================== --- /dev/null +++ test/CodeGen/Thumb/hgpr-spill-basic.mir @@ -0,0 +1,74 @@ +# RUN: llc -run-pass regallocbasic %s -o - | FileCheck %s --check-prefix=CHECK-ALLOC +# RUN: llc -run-pass regallocbasic,virtregrewriter %s -o - | FileCheck %s --check-prefix=CHECK-REWRITE + +# This test examines register allocation and spilling of high register in Thumb1 +# with Basic Register Allocator. The test uses two consecutive inline assembler +# expressions that both request an input variable to be loaded in a high +# register. The first expression marks {r8, r9, r10, r11} as clobbered, the +# second one marks {r12, lr} as such. The allocator cannot choose the same +# register to load the variable and a spill occurs. +# +# The test checks that InlineSpiller used by Basic Register Allocator implements +# the following: +# * A high register in Thumb1 is spilled by inserting a copy to a low register +# and then saving that. +# * A high register in Thumb1 is restored by inserting a load to a low register +# and then a copy to the high register. + +--- | + ; ModuleID = 'test.ll' + source_filename = "test.c" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv6m-none--eabi" + + define dso_local void @constraint_h() { + entry: + %i = alloca i32, align 4 + %0 = load i32, i32* %i, align 4 + call void asm sideeffect "@ $0", "h,~{r8},~{r9},~{r10},~{r11}"(i32 %0) + call void asm sideeffect "@ $0", "h,~{r12},~{lr}"(i32 %0) + ret void + } + +... +--- +name: constraint_h +tracksRegLiveness: true +registers: + - { id: 0, class: hgpr } + - { id: 1, class: tgpr } +stack: + - { id: 0, name: i, size: 4, alignment: 4, stack-id: 0, local-offset: -4 } +body: | + bb.0.entry: + %1:tgpr = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i) + %0:hgpr = COPY %1 + INLINEASM &"@ $0", 1, 589833, %0, 12, implicit-def early-clobber $r8, implicit-def early-clobber $r9, implicit-def early-clobber $r10, implicit-def early-clobber $r11 + INLINEASM &"@ $0", 1, 589833, %0, 12, implicit-def early-clobber $r12, implicit-def early-clobber $lr + tBX_RET 14, $noreg + +... + +# CHECK-ALLOC: bb.0.entry: +# CHECK-ALLOC-NEXT: %1:tgpr = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i) +# CHECK-ALLOC-NEXT: %2:gpr = COPY %1 +# CHECK-ALLOC-NEXT: %3:tgpr = COPY %2 +# CHECK-ALLOC-NEXT: tSTRspi %3, %stack.1, 0, 14, $noreg :: (store 4 into %stack.1) +# CHECK-ALLOC-NEXT: %5:tgpr = tLDRspi %stack.1, 0, 14, $noreg :: (load 4 from %stack.1) +# CHECK-ALLOC-NEXT: %4:hgpr = COPY %5 +# CHECK-ALLOC-NEXT: INLINEASM &"@ $0", 1, 589833, %4, 12, implicit-def early-clobber $r8, implicit-def early-clobber $r9, implicit-def early-clobber $r10, implicit-def early-clobber $r11 +# CHECK-ALLOC-NEXT: %7:tgpr = tLDRspi %stack.1, 0, 14, $noreg :: (load 4 from %stack.1) +# CHECK-ALLOC-NEXT: %6:hgpr = COPY %7 +# CHECK-ALLOC-NEXT: INLINEASM &"@ $0", 1, 589833, %6, 12, implicit-def early-clobber $r12, implicit-def early-clobber $lr +# CHECK-ALLOC-NEXT: tBX_RET 14, $noreg + +# CHECK-REWRITE: bb.0.entry: +# CHECK-REWRITE-NEXT: renamable $r0 = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i) +# CHECK-REWRITE-NEXT: tSTRspi killed renamable $r0, %stack.1, 0, 14, $noreg :: (store 4 into %stack.1) +# CHECK-REWRITE-NEXT: renamable $r0 = tLDRspi %stack.1, 0, 14, $noreg :: (load 4 from %stack.1) +# CHECK-REWRITE-NEXT: renamable $r12 = COPY killed renamable $r0 +# CHECK-REWRITE-NEXT: INLINEASM &"@ $0", 1, 589833, killed renamable $r12, 12, implicit-def early-clobber $r8, implicit-def early-clobber $r9, implicit-def early-clobber $r10, implicit-def early-clobber $r11 +# CHECK-REWRITE-NEXT: renamable $r0 = tLDRspi %stack.1, 0, 14, $noreg :: (load 4 from %stack.1) +# CHECK-REWRITE-NEXT: renamable $r8 = COPY killed renamable $r0 +# CHECK-REWRITE-NEXT: INLINEASM &"@ $0", 1, 589833, killed renamable $r8, 12, implicit-def early-clobber $r12, implicit-def early-clobber $lr +# CHECK-REWRITE-NEXT: tBX_RET 14, $noreg Index: test/CodeGen/Thumb/hgpr-spill-fast.mir =================================================================== --- /dev/null +++ test/CodeGen/Thumb/hgpr-spill-fast.mir @@ -0,0 +1,56 @@ +# RUN: llc -run-pass regallocfast %s -o - | FileCheck %s + +# This test examines register allocation and spilling of high registers in +# Thumb1 with Fast Register Allocator. The test uses inline assembler that +# requests an input variable to be loaded in a high register but at the same +# time has r12 marked as clobbered. The allocator initially satisfies the load +# request by selecting r12 but then needs to spill this register when it reaches +# the INLINEASM instruction and notices its clobber definition. +# +# The test checks that Fast Register Allocator implements the following: +# * A high register in Thumb1 is spilled by inserting a copy to a low register +# and then saving that. +# * A high register in Thumb1 is restored by inserting a load to a low register +# and then a copy to the high register. + +--- | + ; ModuleID = 'test.ll' + source_filename = "test.c" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv6m-none--eabi" + + define dso_local void @constraint_h() { + entry: + %i = alloca i32, align 4 + %0 = load i32, i32* %i, align 4 + call void asm sideeffect "@ $0", "h,~{r12}"(i32 %0) + ret void + } + +... +--- +name: constraint_h +tracksRegLiveness: true +registers: + - { id: 0, class: hgpr } + - { id: 1, class: tgpr } +stack: + - { id: 0, name: i, size: 4, alignment: 4, stack-id: 0, local-offset: -4 } +body: | + bb.0.entry: + %1:tgpr = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i) + %0:hgpr = COPY %1 + INLINEASM &"@ $0", 1, 589833, %0, 12, implicit-def early-clobber $r12 + tBX_RET 14, $noreg + +... + +# CHECK: bb.0.entry: +# CHECK-NEXT: renamable $r0 = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i) +# CHECK-NEXT: renamable $r12 = COPY killed renamable $r0 +# CHECK-NEXT: renamable $r0 = COPY killed $r12 +# CHECK-NEXT: tSTRspi killed renamable $r0, %stack.1, 0, 14, $noreg :: (store 4 into %stack.1) +# CHECK-NEXT: renamable $r0 = tLDRspi %stack.1, 0, 14, $noreg :: (load 4 from %stack.1) +# CHECK-NEXT: $r8 = COPY killed renamable $r0 +# CHECK-NEXT: INLINEASM &"@ $0", 1, 589833, killed renamable $r8, 12, implicit-def early-clobber $r12 +# CHECK-NEXT: tBX_RET 14, $noreg